xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/anv_nir_lower_resource_intel.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_nir.h"
25 #include "nir_builder.h"
26 
27 /* This pass updates the block index in the resource_intel intrinsics if the
28  * array index is constant.
29  *
30  * This pass must be run before anv_nir_compute_push_layout().
31  */
32 static bool
update_resource_intel_block(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * data)33 update_resource_intel_block(nir_builder *b, nir_intrinsic_instr *intrin,
34                             UNUSED void *data)
35 {
36    if (intrin->intrinsic != nir_intrinsic_resource_intel)
37       return false;
38 
39    /* If the array index in the descriptor binding is not const, we won't be
40     * able to turn this load_ubo into a push constant.
41     *
42     * Also if not pushable, set the block to 0xffffffff.
43     *
44     * Otherwise we need to update the block index by adding the array index so
45     * that when anv_nir_compute_push_layout() uses the block value it uses the
46     * right surface in the array of the binding.
47     */
48    if (!nir_src_is_const(intrin->src[2]) ||
49        !(nir_intrinsic_resource_access_intel(intrin) &
50          nir_resource_intel_pushable)) {
51       nir_intrinsic_set_resource_block_intel(intrin, 0xffffffff);
52       nir_intrinsic_set_resource_access_intel(
53          intrin,
54          nir_intrinsic_resource_access_intel(intrin) &
55          ~nir_resource_intel_pushable);
56    } else {
57       nir_intrinsic_set_resource_block_intel(
58          intrin,
59          nir_intrinsic_resource_block_intel(intrin) +
60          nir_src_as_uint(intrin->src[2]));
61    }
62 
63    return true;
64 }
65 
66 bool
anv_nir_update_resource_intel_block(nir_shader * shader)67 anv_nir_update_resource_intel_block(nir_shader *shader)
68 {
69    return nir_shader_intrinsics_pass(shader, update_resource_intel_block,
70                                        nir_metadata_all,
71                                        NULL);
72 }
73 
74 struct lower_resource_state {
75    enum anv_descriptor_set_layout_type desc_type;
76    const struct anv_physical_device *device;
77 };
78 
79 /* This pass lower resource_intel surface_index source, combining the
80  * descriptor set offset with the surface offset in the descriptor set.
81  *
82  * This pass must be run after anv_nir_compute_push_layout() because we want
83  * the push constant selection to tell if the surface offset is constant. Once
84  * combined the constant detection does not work anymore.
85  */
86 static bool
lower_resource_intel(nir_builder * b,nir_intrinsic_instr * intrin,void * data)87 lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
88 {
89    if (intrin->intrinsic != nir_intrinsic_resource_intel)
90       return false;
91 
92    const bool is_bindless =
93       (nir_intrinsic_resource_access_intel(intrin) &
94        nir_resource_intel_bindless) != 0;
95    const bool is_sampler =
96       (nir_intrinsic_resource_access_intel(intrin) &
97        nir_resource_intel_sampler) != 0;
98    const bool is_embedded_sampler =
99       (nir_intrinsic_resource_access_intel(intrin) &
100        nir_resource_intel_sampler_embedded) != 0;
101    const struct lower_resource_state *state = data;
102 
103    /* Ignore binding table accesses & embedded samplers */
104    if (is_embedded_sampler) {
105       assert(state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER);
106       return false;
107    }
108 
109    if (!is_bindless)
110       return true;
111 
112    b->cursor = nir_before_instr(&intrin->instr);
113 
114    nir_def *set_offset = intrin->src[0].ssa;
115    nir_def *binding_offset = intrin->src[1].ssa;
116 
117    /* When using indirect descriptor, the surface handles are loaded from the
118     * descriptor buffer and do not need any offset.
119     */
120    if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT ||
121        state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) {
122       if (!state->device->uses_ex_bso) {
123          /* We're trying to reduce the number of instructions in the shaders
124           * to compute surface handles. The assumption is that we're using
125           * more surface handles than sampler handles (UBO, SSBO, images,
126           * etc...) so it's worth optimizing that case.
127           *
128           * Surface handles in the extended descriptor message have to be
129           * shifted left by 6 prior to ex_bso (bits 31:12 in extended
130           * descriptor, match bits 25:6 of the surface handle). We have to
131           * combine 2 parts in the shader to build the final surface handle,
132           * base offset of the descriptor set (in the push constant, located
133           * in resource_intel::src[0]) and the relative descriptor offset
134           * (resource_intel::src[1]).
135           *
136           * For convenience, up to here, resource_intel::src[1] is in bytes.
137           * We now have to shift it left by 6 to match the shifted left by 6
138           * done for the push constant value provided in
139           * resource_intel::src[0]. That way the shader can just do a single
140           * ADD and get the surface handle.
141           */
142          if (!is_sampler)
143             binding_offset = nir_ishl_imm(b, binding_offset, 6);
144       }
145 
146       nir_src_rewrite(&intrin->src[1],
147                       nir_iadd(b, set_offset, binding_offset));
148    }
149 
150    /* Now unused values : set offset, array index */
151    nir_src_rewrite(&intrin->src[0], nir_imm_int(b, 0xdeaddeed));
152    nir_src_rewrite(&intrin->src[2], nir_imm_int(b, 0xdeaddeed));
153 
154    return true;
155 }
156 
157 bool
anv_nir_lower_resource_intel(nir_shader * shader,const struct anv_physical_device * device,enum anv_descriptor_set_layout_type desc_type)158 anv_nir_lower_resource_intel(nir_shader *shader,
159                              const struct anv_physical_device *device,
160                              enum anv_descriptor_set_layout_type desc_type)
161 {
162    struct lower_resource_state state = {
163       .desc_type = desc_type,
164       .device = device,
165    };
166    return nir_shader_intrinsics_pass(shader, lower_resource_intel,
167                                        nir_metadata_control_flow,
168                                        &state);
169 }
170