xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_nir_lower_image_opcodes_cdna.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 /* This lowers image and texture opcodes to typed buffer opcodes (equivalent to image buffers)
8  * for some CDNA chips. Sampler buffers and image buffers are not lowered.
9  *
10  * Only the subset of opcodes and states that is used by VAAPI and OpenMAX is lowered.
11  * That means CLAMP_TO_EDGE is always used. Only level 0 can be accessed. The minification
12  * and magnification filter settings are assumed to be equal.
13  *
14  * This uses a custom image descriptor that is used in conjunction with this pass. The first
15  * 4 dwords of the descriptor contain the buffer descriptor where the format matches the image
16  * format and the stride matches the pixel size, and the last 4 dwords contain parameters
17  * for manual address computations and bounds checking like the pitch, the number of elements
18  * per slice, etc.
19  *
20  */
21 
22 #include "ac_nir.h"
23 #include "nir_builder.h"
24 #include "amdgfxregs.h"
25 
get_field(nir_builder * b,nir_def * desc,unsigned index,unsigned mask)26 static nir_def *get_field(nir_builder *b, nir_def *desc, unsigned index, unsigned mask)
27 {
28    return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, util_bitcount(mask));
29 }
30 
get_coord_components(enum glsl_sampler_dim dim,bool is_array)31 static unsigned get_coord_components(enum glsl_sampler_dim dim, bool is_array)
32 {
33    switch (dim) {
34    case GLSL_SAMPLER_DIM_1D:
35       return is_array ? 2 : 1;
36    case GLSL_SAMPLER_DIM_2D:
37    case GLSL_SAMPLER_DIM_RECT:
38       return is_array ? 3 : 2;
39    case GLSL_SAMPLER_DIM_3D:
40       return 3;
41    default:
42       unreachable("unexpected sampler type");
43    }
44 }
45 
46 /* Lower image coordinates to a buffer element index. Return UINT_MAX if the image coordinates
47  * are out of bounds.
48  */
lower_image_coords(nir_builder * b,nir_def * desc,nir_def * coord,enum glsl_sampler_dim dim,bool is_array,bool handle_out_of_bounds)49 static nir_def *lower_image_coords(nir_builder *b, nir_def *desc, nir_def *coord,
50                                        enum glsl_sampler_dim dim, bool is_array,
51                                        bool handle_out_of_bounds)
52 {
53    unsigned num_coord_components = get_coord_components(dim, is_array);
54    nir_def *zero = nir_imm_int(b, 0);
55 
56    if (coord->bit_size == 16)
57       coord = nir_u2u32(b, coord);
58 
59    /* Get coordinates. */
60    nir_def *x = nir_channel(b, coord, 0);
61    nir_def *y = num_coord_components >= 2 ? nir_channel(b, coord, 1) : NULL;
62    nir_def *z = num_coord_components >= 3 ? nir_channel(b, coord, 2) : NULL;
63 
64    if (dim == GLSL_SAMPLER_DIM_1D && is_array) {
65       z = y;
66       y = NULL;
67    }
68 
69    if (is_array) {
70       nir_def *first_layer = get_field(b, desc, 5, 0xffff0000);
71       z = nir_iadd(b, z, first_layer);
72    }
73 
74    /* Compute the buffer element index. */
75    nir_def *index = x;
76    if (y) {
77       nir_def *pitch = nir_channel(b, desc, 6);
78       index = nir_iadd(b, index, nir_imul(b, pitch, y));
79    }
80    if (z) {
81       nir_def *slice_elements = nir_channel(b, desc, 7);
82       index = nir_iadd(b, index, nir_imul(b, slice_elements, z));
83    }
84 
85    /* Determine whether the coordinates are out of bounds. */
86    nir_def *out_of_bounds = NULL;
87 
88    if (handle_out_of_bounds) {
89       nir_def *width = get_field(b, desc, 4, 0xffff);
90       out_of_bounds = nir_ior(b, nir_ilt(b, x, zero), nir_ige(b, x, width));
91 
92       if (y) {
93          nir_def *height = get_field(b, desc, 4, 0xffff0000);
94          out_of_bounds = nir_ior(b, out_of_bounds,
95                                  nir_ior(b, nir_ilt(b, y, zero), nir_ige(b, y, height)));
96       }
97       if (z) {
98          nir_def *depth = get_field(b, desc, 5, 0xffff);
99          out_of_bounds = nir_ior(b, out_of_bounds,
100                                  nir_ior(b, nir_ilt(b, z, zero), nir_ige(b, z, depth)));
101       }
102 
103       /* Make the buffer opcode out of bounds by setting UINT_MAX. */
104       index = nir_bcsel(b, out_of_bounds, nir_imm_int(b, UINT_MAX), index);
105    }
106 
107    return index;
108 }
109 
emulated_image_load(nir_builder * b,unsigned num_components,unsigned bit_size,nir_def * desc,nir_def * coord,enum gl_access_qualifier access,enum glsl_sampler_dim dim,bool is_array,bool handle_out_of_bounds)110 static nir_def *emulated_image_load(nir_builder *b, unsigned num_components, unsigned bit_size,
111                                         nir_def *desc, nir_def *coord,
112                                         enum gl_access_qualifier access, enum glsl_sampler_dim dim,
113                                         bool is_array, bool handle_out_of_bounds)
114 {
115    nir_def *zero = nir_imm_int(b, 0);
116 
117    return nir_load_buffer_amd(b, num_components, bit_size, nir_channels(b, desc, 0xf),
118                               zero, zero,
119                               lower_image_coords(b, desc, coord, dim, is_array,
120                                                  handle_out_of_bounds),
121                               .base = 0,
122                               .memory_modes = nir_var_image,
123                               .access = access | ACCESS_USES_FORMAT_AMD);
124 }
125 
emulated_image_store(nir_builder * b,nir_def * desc,nir_def * coord,nir_def * data,enum gl_access_qualifier access,enum glsl_sampler_dim dim,bool is_array)126 static void emulated_image_store(nir_builder *b, nir_def *desc, nir_def *coord,
127                                  nir_def *data, enum gl_access_qualifier access,
128                                  enum glsl_sampler_dim dim, bool is_array)
129 {
130    nir_def *zero = nir_imm_int(b, 0);
131 
132    nir_store_buffer_amd(b, data, nir_channels(b, desc, 0xf), zero, zero,
133                         lower_image_coords(b, desc, coord, dim, is_array, true),
134                         .base = 0,
135                         .memory_modes = nir_var_image,
136                         .access = access | ACCESS_USES_FORMAT_AMD);
137 }
138 
139 /* Return the width, height, or depth for dim=0,1,2. */
get_dim(nir_builder * b,nir_def * desc,unsigned dim)140 static nir_def *get_dim(nir_builder *b, nir_def *desc, unsigned dim)
141 {
142    return get_field(b, desc, 4 + dim / 2, 0xffff << (16 * (dim % 2)));
143 }
144 
145 /* Lower txl with lod=0 to typed buffer loads. This is based on the equations in the GL spec.
146  * This basically converts the tex opcode into 1 or more image_load opcodes.
147  */
emulated_tex_level_zero(nir_builder * b,unsigned num_components,unsigned bit_size,nir_def * desc,nir_def * sampler_desc,nir_def * coord_vec,enum glsl_sampler_dim sampler_dim,bool is_array)148 static nir_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components,
149                                             unsigned bit_size, nir_def *desc,
150                                             nir_def *sampler_desc, nir_def *coord_vec,
151                                             enum glsl_sampler_dim sampler_dim, bool is_array)
152 {
153    const enum gl_access_qualifier access =
154       ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER;
155    const unsigned num_coord_components = get_coord_components(sampler_dim, is_array);
156    const unsigned num_dim_coords = num_coord_components - is_array;
157    const unsigned array_comp = num_coord_components - 1;
158 
159    nir_def *zero = nir_imm_int(b, 0);
160    nir_def *fp_one = nir_imm_floatN_t(b, 1, bit_size);
161    nir_def *coord[3] = {0};
162 
163    if (coord_vec->bit_size == 16)
164       coord_vec = nir_f2f32(b, coord_vec);
165 
166    assert(num_coord_components <= 3);
167    for (unsigned i = 0; i < num_coord_components; i++)
168       coord[i] = nir_channel(b, coord_vec, i);
169 
170    /* Convert to unnormalized coordinates. */
171    if (sampler_dim != GLSL_SAMPLER_DIM_RECT) {
172       for (unsigned dim = 0; dim < num_dim_coords; dim++)
173          coord[dim] = nir_fmul(b, coord[dim], nir_u2f32(b, get_dim(b, desc, dim)));
174    }
175 
176    /* The layer index is handled differently and ignores the filter and wrap mode. */
177    if (is_array) {
178       coord[array_comp] = nir_f2i32(b, nir_fround_even(b, coord[array_comp]));
179       coord[array_comp] = nir_iclamp(b, coord[array_comp], zero,
180                                      nir_iadd_imm(b, get_dim(b, desc, 2), -1));
181    }
182 
183    /* Determine the filter by reading the first bit of the XY_MAG_FILTER field,
184     * which is 1 for linear, 0 for nearest.
185     *
186     * We assume that XY_MIN_FILTER and Z_FILTER are identical.
187     */
188    nir_def *is_nearest =
189       nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, sampler_desc, 2), 1 << 20), 0);
190    nir_def *result_nearest, *result_linear;
191 
192    nir_if *if_nearest = nir_push_if(b, is_nearest);
193    {
194       /* Nearest filter. */
195       nir_def *coord0[3] = {0};
196       memcpy(coord0, coord, sizeof(coord));
197 
198       for (unsigned dim = 0; dim < num_dim_coords; dim++) {
199          /* Convert to integer coordinates. (floor is required) */
200          coord0[dim] = nir_f2i32(b, nir_ffloor(b, coord0[dim]));
201 
202          /* Apply the wrap mode. We assume it's always CLAMP_TO_EDGE, so clamp. */
203          coord0[dim] = nir_iclamp(b, coord0[dim], zero, nir_iadd_imm(b, get_dim(b, desc, dim), -1));
204       }
205 
206       /* Load the texel. */
207       result_nearest = emulated_image_load(b, num_components, bit_size, desc,
208                                            nir_vec(b, coord0, num_coord_components),
209                                            access, sampler_dim, is_array, false);
210    }
211    nir_push_else(b, if_nearest);
212    {
213       /* Linear filter. */
214       nir_def *coord0[3] = {0};
215       nir_def *coord1[3] = {0};
216       nir_def *weight[3] = {0};
217 
218       memcpy(coord0, coord, sizeof(coord));
219 
220       for (unsigned dim = 0; dim < num_dim_coords; dim++) {
221          /* First subtract 0.5. */
222          coord0[dim] = nir_fadd_imm(b, coord0[dim], -0.5);
223 
224          /* Use fract to compute the filter weights. (FP16 results will get FP16 filter precision) */
225          weight[dim] = nir_f2fN(b, nir_ffract(b, coord0[dim]), bit_size);
226 
227          /* Floor to get the top-left texel of the filter. */
228          /* Add 1 to get the bottom-right texel. */
229          coord0[dim] = nir_f2i32(b, nir_ffloor(b, coord0[dim]));
230          coord1[dim] = nir_iadd_imm(b, coord0[dim], 1);
231 
232          /* Apply the wrap mode. We assume it's always CLAMP_TO_EDGE, so clamp. */
233          coord0[dim] = nir_iclamp(b, coord0[dim], zero, nir_iadd_imm(b, get_dim(b, desc, dim), -1));
234          coord1[dim] = nir_iclamp(b, coord1[dim], zero, nir_iadd_imm(b, get_dim(b, desc, dim), -1));
235       }
236 
237       /* Load all texels for the linear filter.
238        * This is 2 texels for 1D, 4 texels for 2D, and 8 texels for 3D.
239        */
240       nir_def *texel[8];
241 
242       for (unsigned i = 0; i < (1 << num_dim_coords); i++) {
243          nir_def *texel_coord[3];
244 
245          /* Determine whether the current texel should use channels from coord0
246           * or coord1. The i-th bit of the texel index determines that.
247           */
248          for (unsigned dim = 0; dim < num_dim_coords; dim++)
249             texel_coord[dim] = (i >> dim) & 0x1 ? coord1[dim] : coord0[dim];
250 
251          /* Add the layer index, which doesn't change between texels. */
252          if (is_array)
253             texel_coord[array_comp] = coord0[array_comp];
254 
255          /* Compute how much the texel contributes to the final result. */
256          nir_def *texel_weight = fp_one;
257          for (unsigned dim = 0; dim < num_dim_coords; dim++) {
258             /* Let's see what "i" represents:
259              *    Texel i=0 = 000
260              *    Texel i=1 = 001
261              *    Texel i=2 = 010 (2D & 3D only)
262              *    Texel i=3 = 011 (2D & 3D only)
263              *    Texel i=4 = 100 (3D only)
264              *    Texel i=5 = 101 (3D only)
265              *    Texel i=6 = 110 (3D only)
266              *    Texel i=7 = 111 (3D only)
267              *
268              * The rightmost bit (LSB) represents the X direction, the middle bit represents
269              * the Y direction, and the leftmost bit (MSB) represents the Z direction.
270              * If we shift the texel index "i" by the dimension "dim", we'll get whether that
271              * texel value should be multiplied by (1 - weight[dim]) or (weight[dim]).
272              */
273             texel_weight = nir_fmul(b, texel_weight,
274                                      (i >> dim) & 0x1 ? weight[dim] :
275                                                       nir_fadd(b, fp_one, nir_fneg(b, weight[dim])));
276          }
277 
278          /* Load the linear filter texel. */
279          texel[i] = emulated_image_load(b, num_components, bit_size, desc,
280                                          nir_vec(b, texel_coord, num_coord_components),
281                                          access, sampler_dim, is_array, false);
282 
283          /* Multiply the texel by the weight. */
284          texel[i] = nir_fmul(b, texel[i], texel_weight);
285       }
286 
287       /* Sum up all weighted texels to get the final result of linear filtering. */
288       result_linear = zero;
289       for (unsigned i = 0; i < (1 << num_dim_coords); i++)
290          result_linear = nir_fadd(b, result_linear, texel[i]);
291    }
292    nir_pop_if(b, if_nearest);
293 
294    return nir_if_phi(b, result_nearest, result_linear);
295 }
296 
lower_image_opcodes(nir_builder * b,nir_instr * instr,void * data)297 static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
298 {
299    if (instr->type == nir_instr_type_intrinsic) {
300       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
301       nir_deref_instr *deref;
302       enum gl_access_qualifier access;
303       enum glsl_sampler_dim dim;
304       bool is_array;
305       unsigned num_desc_components;
306       nir_def *desc = NULL, *result = NULL;
307       ASSERTED const char *intr_name;
308 
309       nir_def *dst = &intr->def;
310       b->cursor = nir_before_instr(instr);
311 
312       switch (intr->intrinsic) {
313       case nir_intrinsic_image_load:
314       case nir_intrinsic_image_store:
315          access = nir_intrinsic_access(intr);
316          dim = nir_intrinsic_image_dim(intr);
317          if (dim == GLSL_SAMPLER_DIM_BUF)
318             return false;
319          is_array = nir_intrinsic_image_array(intr);
320          num_desc_components = dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
321 
322          if (intr->src[0].ssa->bit_size == 32 &&
323              intr->src[0].ssa->num_components == num_desc_components)
324             desc = intr->src[0].ssa;
325          else
326             desc = nir_image_descriptor_amd(b, num_desc_components,
327                                             32, intr->src[0].ssa);
328          break;
329 
330       case nir_intrinsic_image_deref_load:
331       case nir_intrinsic_image_deref_store:
332          deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
333          access = nir_deref_instr_get_variable(deref)->data.access;
334          dim = glsl_get_sampler_dim(deref->type);
335          if (dim == GLSL_SAMPLER_DIM_BUF)
336             return false;
337          is_array = glsl_sampler_type_is_array(deref->type);
338          num_desc_components = dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
339 
340          if (intr->src[0].ssa->bit_size == 32 &&
341              intr->src[0].ssa->num_components == num_desc_components)
342             desc = intr->src[0].ssa;
343          else
344             desc = nir_image_deref_descriptor_amd(b, num_desc_components,
345                                                   32, intr->src[0].ssa);
346          break;
347 
348       case nir_intrinsic_bindless_image_load:
349       case nir_intrinsic_bindless_image_store:
350          access = nir_intrinsic_access(intr);
351          dim = nir_intrinsic_image_dim(intr);
352          if (dim == GLSL_SAMPLER_DIM_BUF)
353             return false;
354          is_array = nir_intrinsic_image_array(intr);
355          num_desc_components = dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
356 
357          if (intr->src[0].ssa->bit_size == 32 &&
358              intr->src[0].ssa->num_components == num_desc_components)
359             desc = intr->src[0].ssa;
360          else
361             desc = nir_bindless_image_descriptor_amd(b, num_desc_components,
362                                                      32, intr->src[0].ssa);
363          break;
364 
365       /* These don't need any lowering. */
366       case nir_intrinsic_image_descriptor_amd:
367       case nir_intrinsic_image_deref_descriptor_amd:
368       case nir_intrinsic_bindless_image_descriptor_amd:
369          return false;
370 
371       default:
372          intr_name = nir_intrinsic_infos[intr->intrinsic].name;
373 
374          /* No other intrinsics are expected from VAAPI and OpenMAX.
375           * (this lowering is only used by CDNA, which only uses those frontends)
376           */
377          if (strstr(intr_name, "image") == intr_name ||
378              strstr(intr_name, "bindless_image") == intr_name) {
379             fprintf(stderr, "Unexpected image opcode: ");
380             nir_print_instr(instr, stderr);
381             fprintf(stderr, "\nAborting to prevent a hang.");
382             abort();
383          }
384          return false;
385       }
386 
387       switch (intr->intrinsic) {
388       case nir_intrinsic_image_load:
389       case nir_intrinsic_image_deref_load:
390       case nir_intrinsic_bindless_image_load:
391          result = emulated_image_load(b, intr->def.num_components, intr->def.bit_size,
392                                       desc, intr->src[1].ssa, access, dim, is_array, true);
393          nir_def_rewrite_uses_after(dst, result, instr);
394          nir_instr_remove(instr);
395          return true;
396 
397       case nir_intrinsic_image_store:
398       case nir_intrinsic_image_deref_store:
399       case nir_intrinsic_bindless_image_store:
400          emulated_image_store(b, desc, intr->src[1].ssa, intr->src[3].ssa, access, dim, is_array);
401          nir_instr_remove(instr);
402          return true;
403 
404       default:
405          unreachable("shouldn't get here");
406       }
407    } else if (instr->type == nir_instr_type_tex) {
408       nir_tex_instr *tex = nir_instr_as_tex(instr);
409       nir_tex_instr *new_tex;
410       nir_def *coord = NULL, *desc = NULL, *sampler_desc = NULL, *result = NULL;
411 
412       nir_def *dst = &tex->def;
413       b->cursor = nir_before_instr(instr);
414 
415       switch (tex->op) {
416       case nir_texop_tex:
417       case nir_texop_txl:
418       case nir_texop_txf:
419          for (unsigned i = 0; i < tex->num_srcs; i++) {
420             switch (tex->src[i].src_type) {
421             case nir_tex_src_texture_deref:
422             case nir_tex_src_texture_handle:
423                if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
424                   return false;
425                new_tex = nir_tex_instr_create(b->shader, 1);
426                new_tex->op = nir_texop_descriptor_amd;
427                new_tex->sampler_dim = tex->sampler_dim;
428                new_tex->is_array = tex->is_array;
429                new_tex->texture_index = tex->texture_index;
430                new_tex->sampler_index = tex->sampler_index;
431                new_tex->dest_type = nir_type_int32;
432                new_tex->src[0].src = nir_src_for_ssa(tex->src[i].src.ssa);
433                new_tex->src[0].src_type = tex->src[i].src_type;
434                nir_def_init(&new_tex->instr, &new_tex->def,
435                             nir_tex_instr_dest_size(new_tex), 32);
436                nir_builder_instr_insert(b, &new_tex->instr);
437                desc = &new_tex->def;
438                break;
439 
440             case nir_tex_src_sampler_deref:
441             case nir_tex_src_sampler_handle:
442                if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
443                   return false;
444                new_tex = nir_tex_instr_create(b->shader, 1);
445                new_tex->op = nir_texop_sampler_descriptor_amd;
446                new_tex->sampler_dim = tex->sampler_dim;
447                new_tex->is_array = tex->is_array;
448                new_tex->texture_index = tex->texture_index;
449                new_tex->sampler_index = tex->sampler_index;
450                new_tex->dest_type = nir_type_int32;
451                new_tex->src[0].src = nir_src_for_ssa(tex->src[i].src.ssa);
452                new_tex->src[0].src_type = tex->src[i].src_type;
453                nir_def_init(&new_tex->instr, &new_tex->def,
454                             nir_tex_instr_dest_size(new_tex), 32);
455                nir_builder_instr_insert(b, &new_tex->instr);
456                sampler_desc = &new_tex->def;
457                break;
458 
459             case nir_tex_src_coord:
460                coord = tex->src[i].src.ssa;
461                break;
462 
463             case nir_tex_src_projector:
464             case nir_tex_src_comparator:
465             case nir_tex_src_offset:
466             case nir_tex_src_texture_offset:
467             case nir_tex_src_sampler_offset:
468             case nir_tex_src_plane:
469                unreachable("unsupported texture src");
470 
471             default:;
472             }
473          }
474 
475          switch (tex->op) {
476          case nir_texop_txf:
477             result = emulated_image_load(b, tex->def.num_components, tex->def.bit_size,
478                                          desc, coord,
479                                          ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER,
480                                          tex->sampler_dim, tex->is_array, true);
481             nir_def_rewrite_uses_after(dst, result, instr);
482             nir_instr_remove(instr);
483             return true;
484 
485          case nir_texop_tex:
486          case nir_texop_txl:
487             result = emulated_tex_level_zero(b, tex->def.num_components, tex->def.bit_size,
488                                   desc, sampler_desc, coord, tex->sampler_dim, tex->is_array);
489             nir_def_rewrite_uses_after(dst, result, instr);
490             nir_instr_remove(instr);
491             return true;
492 
493          default:
494             unreachable("shouldn't get here");
495          }
496          break;
497 
498       case nir_texop_descriptor_amd:
499       case nir_texop_sampler_descriptor_amd:
500          return false;
501 
502       default:
503          fprintf(stderr, "Unexpected texture opcode: ");
504          nir_print_instr(instr, stderr);
505          fprintf(stderr, "\nAborting to prevent a hang.");
506          abort();
507       }
508    }
509 
510    return false;
511 }
512 
ac_nir_lower_image_opcodes(nir_shader * nir)513 bool ac_nir_lower_image_opcodes(nir_shader *nir)
514 {
515    return nir_shader_instructions_pass(nir, lower_image_opcodes,
516                                        nir_metadata_control_flow,
517                                        NULL);
518 }
519