xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/intel_nir_lower_sparse.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "intel_nir.h"
25 #include "compiler/nir/nir_builder.h"
26 
27 /*
28  * This pass lowers a few of the sparse instructions to something HW can
29  * handle.
30  *
31  * The image_*_sparse_load intrinsics are lowered into 2 instructions, a
32  * regular image_*_load intrinsic and a sparse texture txf operation and
33  * reconstructs the sparse vector of the original intrinsic using the 2 new
34  * values. We need to do this because our backend implements image load/store
35  * using the dataport and the dataport unit doesn't provide residency
36  * information. We need to use the sampler for residency.
37  *
38  * The is_sparse_texels_resident intrinsic is lowered to a bit checking
39  * operation as the data reported by the sampler is a single bit per lane in
40  * the first component.
41  *
42  * The tex_* instructions with a compare value need to be lower into 2
43  * instructions due to a HW limitation :
44  *
45  * SKL PRMs, Volume 7: 3D-Media-GPGPU, Messages, SIMD Payloads :
46  *
47  *    "The Pixel Null Mask field, when enabled via the Pixel Null Mask Enable
48  *     will be incorect for sample_c when applied to a surface with 64-bit per
49  *     texel format such as R16G16BA16_UNORM. Pixel Null mask Enable may
50  *     incorrectly report pixels as referencing a Null surface."
51  */
52 
53 static void
lower_is_sparse_texels_resident(nir_builder * b,nir_intrinsic_instr * intrin)54 lower_is_sparse_texels_resident(nir_builder *b, nir_intrinsic_instr *intrin)
55 {
56    b->cursor = nir_instr_remove(&intrin->instr);
57 
58    nir_def_rewrite_uses(
59       &intrin->def,
60       nir_i2b(b, nir_iand(b, intrin->src[0].ssa,
61                               nir_ishl(b, nir_imm_int(b, 1),
62                                           nir_load_subgroup_invocation(b)))));
63 }
64 
65 static void
lower_sparse_residency_code_and(nir_builder * b,nir_intrinsic_instr * intrin)66 lower_sparse_residency_code_and(nir_builder *b, nir_intrinsic_instr *intrin)
67 {
68    b->cursor = nir_instr_remove(&intrin->instr);
69 
70    nir_def_rewrite_uses(
71       &intrin->def,
72       nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa));
73 }
74 
75 static void
lower_sparse_image_load(nir_builder * b,nir_intrinsic_instr * intrin)76 lower_sparse_image_load(nir_builder *b, nir_intrinsic_instr *intrin)
77 {
78    b->cursor = nir_instr_remove(&intrin->instr);
79 
80    nir_def *img_load;
81    nir_intrinsic_instr *new_intrin;
82    if (intrin->intrinsic == nir_intrinsic_image_sparse_load) {
83       img_load = nir_image_load(b,
84                                 intrin->num_components - 1,
85                                 intrin->def.bit_size,
86                                 intrin->src[0].ssa,
87                                 intrin->src[1].ssa,
88                                 intrin->src[2].ssa,
89                                 intrin->src[3].ssa);
90       new_intrin = nir_instr_as_intrinsic(img_load->parent_instr);
91       nir_intrinsic_set_range_base(new_intrin, nir_intrinsic_range_base(intrin));
92    } else {
93       img_load = nir_bindless_image_load(b,
94                                          intrin->num_components - 1,
95                                          intrin->def.bit_size,
96                                          intrin->src[0].ssa,
97                                          intrin->src[1].ssa,
98                                          intrin->src[2].ssa,
99                                          intrin->src[3].ssa);
100       new_intrin = nir_instr_as_intrinsic(img_load->parent_instr);
101    }
102 
103    nir_intrinsic_set_image_array(new_intrin, nir_intrinsic_image_array(intrin));
104    nir_intrinsic_set_image_dim(new_intrin, nir_intrinsic_image_dim(intrin));
105    nir_intrinsic_set_format(new_intrin, nir_intrinsic_format(intrin));
106    nir_intrinsic_set_access(new_intrin, nir_intrinsic_access(intrin));
107    nir_intrinsic_set_dest_type(new_intrin, nir_intrinsic_dest_type(intrin));
108 
109    nir_def *dests[NIR_MAX_VEC_COMPONENTS];
110    for (unsigned i = 0; i < intrin->num_components - 1; i++) {
111       dests[i] = nir_channel(b, img_load, i);
112    }
113 
114    /* Use texture instruction to compute residency */
115    nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
116 
117    tex->op = nir_texop_txf;
118    /* We don't care about the dest type since we're not using any of that
119     * data.
120     */
121    tex->dest_type = nir_type_float32;
122    tex->is_array = nir_intrinsic_image_array(intrin);
123    tex->is_shadow = false;
124    tex->sampler_index = 0;
125    tex->is_sparse = true;
126 
127    tex->src[0].src_type = intrin->intrinsic == nir_intrinsic_image_sparse_load ?
128                           nir_tex_src_texture_offset :
129                           nir_tex_src_texture_handle;
130    tex->src[0].src = nir_src_for_ssa(intrin->src[0].ssa);
131 
132    tex->coord_components = nir_image_intrinsic_coord_components(intrin);
133    nir_def *coord;
134    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
135        nir_intrinsic_image_array(intrin)) {
136       tex->coord_components++;
137 
138       nir_def *img_layer = nir_channel(b, intrin->src[1].ssa, 2);
139       nir_def *tex_slice = nir_idiv(b, img_layer, nir_imm_int(b, 6));
140       nir_def *tex_face =
141          nir_iadd(b, img_layer, nir_ineg(b, nir_imul_imm(b, tex_slice, 6)));
142       nir_def *comps[4] = {
143          nir_channel(b, intrin->src[1].ssa, 0),
144          nir_channel(b, intrin->src[1].ssa, 1),
145          tex_face,
146          tex_slice
147       };
148       coord = nir_vec(b, comps, 4);
149    } else {
150       coord = nir_channels(b, intrin->src[1].ssa,
151                            nir_component_mask(tex->coord_components));
152    }
153    tex->src[1].src_type = nir_tex_src_coord;
154    tex->src[1].src = nir_src_for_ssa(coord);
155 
156    tex->src[2].src_type = nir_tex_src_lod;
157    tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0));
158 
159    nir_def_init(&tex->instr, &tex->def, 5,
160                 intrin->def.bit_size);
161 
162    nir_builder_instr_insert(b, &tex->instr);
163 
164    dests[intrin->num_components - 1] = nir_channel(b, &tex->def, 4);
165 
166    nir_def_rewrite_uses(
167       &intrin->def,
168       nir_vec(b, dests, intrin->num_components));
169 }
170 
171 static void
lower_tex_compare(nir_builder * b,nir_tex_instr * tex,int compare_idx)172 lower_tex_compare(nir_builder *b, nir_tex_instr *tex, int compare_idx)
173 {
174    b->cursor = nir_after_instr(&tex->instr);
175 
176    /* Clone the original instruction */
177    nir_tex_instr *sparse_tex = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
178    nir_def_init(&sparse_tex->instr, &sparse_tex->def,
179                 tex->def.num_components, tex->def.bit_size);
180    nir_builder_instr_insert(b, &sparse_tex->instr);
181 
182    /* Drop the compare source on the cloned instruction */
183    nir_tex_instr_remove_src(sparse_tex, compare_idx);
184 
185    /* Drop the residency query on the original tex instruction */
186    tex->is_sparse = false;
187    tex->def.num_components = tex->def.num_components - 1;
188 
189    nir_def *new_comps[NIR_MAX_VEC_COMPONENTS];
190    for (unsigned i = 0; i < tex->def.num_components; i++)
191       new_comps[i] = nir_channel(b, &tex->def, i);
192    new_comps[tex->def.num_components] =
193       nir_channel(b, &sparse_tex->def, tex->def.num_components);
194 
195    nir_def *new_vec = nir_vec(b, new_comps, sparse_tex->def.num_components);
196 
197    nir_def_rewrite_uses_after(&tex->def, new_vec, new_vec->parent_instr);
198 }
199 
200 static bool
lower_sparse_intrinsics(nir_builder * b,nir_instr * instr,void * cb_data)201 lower_sparse_intrinsics(nir_builder *b, nir_instr *instr, void *cb_data)
202 {
203    switch (instr->type) {
204    case nir_instr_type_intrinsic: {
205       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
206       switch (intrin->intrinsic) {
207       case nir_intrinsic_image_sparse_load:
208       case nir_intrinsic_bindless_image_sparse_load:
209          lower_sparse_image_load(b, intrin);
210          return true;
211 
212       case nir_intrinsic_is_sparse_texels_resident:
213          lower_is_sparse_texels_resident(b, intrin);
214          return true;
215 
216       case nir_intrinsic_sparse_residency_code_and:
217          lower_sparse_residency_code_and(b, intrin);
218          return true;
219 
220       default:
221          return false;
222       }
223    }
224 
225    case nir_instr_type_tex: {
226       nir_tex_instr *tex = nir_instr_as_tex(instr);
227       int comp_idx = nir_tex_instr_src_index(tex, nir_tex_src_comparator);
228       if (comp_idx != -1 && tex->is_sparse) {
229          lower_tex_compare(b, tex, comp_idx);
230          return true;
231       }
232       return false;
233    }
234 
235    default:
236       return false;
237    }
238 }
239 
240 bool
intel_nir_lower_sparse_intrinsics(nir_shader * nir)241 intel_nir_lower_sparse_intrinsics(nir_shader *nir)
242 {
243    return nir_shader_instructions_pass(nir, lower_sparse_intrinsics,
244                                        nir_metadata_control_flow,
245                                        NULL);
246 }
247