xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_nir_lower_storage_image.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "isl/isl.h"
25 
26 #include "elk_nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/nir/nir_format_convert.h"
29 
30 static nir_def *
_load_image_param(nir_builder * b,nir_deref_instr * deref,unsigned offset)31 _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset)
32 {
33    nir_intrinsic_instr *load =
34       nir_intrinsic_instr_create(b->shader,
35                                  nir_intrinsic_image_deref_load_param_intel);
36    load->src[0] = nir_src_for_ssa(&deref->def);
37    nir_intrinsic_set_base(load, offset / 4);
38 
39    switch (offset) {
40    case ISL_IMAGE_PARAM_OFFSET_OFFSET:
41    case ISL_IMAGE_PARAM_SWIZZLING_OFFSET:
42       load->num_components = 2;
43       break;
44    case ISL_IMAGE_PARAM_TILING_OFFSET:
45    case ISL_IMAGE_PARAM_SIZE_OFFSET:
46       load->num_components = 3;
47       break;
48    case ISL_IMAGE_PARAM_STRIDE_OFFSET:
49       load->num_components = 4;
50       break;
51    default:
52       unreachable("Invalid param offset");
53    }
54    nir_def_init(&load->instr, &load->def, load->num_components, 32);
55 
56    nir_builder_instr_insert(b, &load->instr);
57    return &load->def;
58 }
59 
60 #define load_image_param(b, d, o) \
61    _load_image_param(b, d, ISL_IMAGE_PARAM_##o##_OFFSET)
62 
63 static nir_def *
image_coord_is_in_bounds(nir_builder * b,nir_deref_instr * deref,nir_def * coord)64 image_coord_is_in_bounds(nir_builder *b, nir_deref_instr *deref,
65                          nir_def *coord)
66 {
67    nir_def *size = load_image_param(b, deref, SIZE);
68    nir_def *cmp = nir_ilt(b, coord, size);
69 
70    unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
71    nir_def *in_bounds = nir_imm_true(b);
72    for (unsigned i = 0; i < coord_comps; i++)
73       in_bounds = nir_iand(b, in_bounds, nir_channel(b, cmp, i));
74 
75    return in_bounds;
76 }
77 
78 /** Calculate the offset in memory of the texel given by \p coord.
79  *
80  * This is meant to be used with untyped surface messages to access a tiled
81  * surface, what involves taking into account the tiling and swizzling modes
82  * of the surface manually so it will hopefully not happen very often.
83  *
84  * The tiling algorithm implemented here matches either the X or Y tiling
85  * layouts supported by the hardware depending on the tiling coefficients
86  * passed to the program as uniforms.  See Volume 1 Part 2 Section 4.5
87  * "Address Tiling Function" of the IVB PRM for an in-depth explanation of
88  * the hardware tiling format.
89  */
90 static nir_def *
image_address(nir_builder * b,const struct intel_device_info * devinfo,nir_deref_instr * deref,nir_def * coord)91 image_address(nir_builder *b, const struct intel_device_info *devinfo,
92               nir_deref_instr *deref, nir_def *coord)
93 {
94    if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_1D &&
95        glsl_sampler_type_is_array(deref->type)) {
96       /* It's easier if 1D arrays are treated like 2D arrays */
97       coord = nir_vec3(b, nir_channel(b, coord, 0),
98                           nir_imm_int(b, 0),
99                           nir_channel(b, coord, 1));
100    } else {
101       unsigned dims = glsl_get_sampler_coordinate_components(deref->type);
102       coord = nir_trim_vector(b, coord, dims);
103    }
104 
105    nir_def *offset = load_image_param(b, deref, OFFSET);
106    nir_def *tiling = load_image_param(b, deref, TILING);
107    nir_def *stride = load_image_param(b, deref, STRIDE);
108 
109    /* Shift the coordinates by the fixed surface offset.  It may be non-zero
110     * if the image is a single slice of a higher-dimensional surface, or if a
111     * non-zero mipmap level of the surface is bound to the pipeline.  The
112     * offset needs to be applied here rather than at surface state set-up time
113     * because the desired slice-level may start mid-tile, so simply shifting
114     * the surface base address wouldn't give a well-formed tiled surface in
115     * the general case.
116     */
117    nir_def *xypos = (coord->num_components == 1) ?
118                         nir_vec2(b, coord, nir_imm_int(b, 0)) :
119                         nir_trim_vector(b, coord, 2);
120    xypos = nir_iadd(b, xypos, offset);
121 
122    /* The layout of 3-D textures in memory is sort-of like a tiling
123     * format.  At each miplevel, the slices are arranged in rows of
124     * 2^level slices per row.  The slice row is stored in tmp.y and
125     * the slice within the row is stored in tmp.x.
126     *
127     * The layout of 2-D array textures and cubemaps is much simpler:
128     * Depending on whether the ARYSPC_LOD0 layout is in use it will be
129     * stored in memory as an array of slices, each one being a 2-D
130     * arrangement of miplevels, or as a 2D arrangement of miplevels,
131     * each one being an array of slices.  In either case the separation
132     * between slices of the same LOD is equal to the qpitch value
133     * provided as stride.w.
134     *
135     * This code can be made to handle either 2D arrays and 3D textures
136     * by passing in the miplevel as tile.z for 3-D textures and 0 in
137     * tile.z for 2-D array textures.
138     *
139     * See Volume 1 Part 1 of the Gfx7 PRM, sections 6.18.4.7 "Surface
140     * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
141     * of the hardware 3D texture and 2D array layouts.
142     */
143    if (coord->num_components > 2) {
144       /* Decompose z into a major (tmp.y) and a minor (tmp.x)
145        * index.
146        */
147       nir_def *z = nir_channel(b, coord, 2);
148       nir_def *z_x = nir_ubfe(b, z, nir_imm_int(b, 0),
149                                   nir_channel(b, tiling, 2));
150       nir_def *z_y = nir_ushr(b, z, nir_channel(b, tiling, 2));
151 
152       /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
153        * slice offset.
154        */
155       xypos = nir_iadd(b, xypos, nir_imul(b, nir_vec2(b, z_x, z_y),
156                                              nir_channels(b, stride, 0xc)));
157    }
158 
159    nir_def *addr;
160    if (coord->num_components > 1) {
161       /* Calculate the major/minor x and y indices.  In order to
162        * accommodate both X and Y tiling, the Y-major tiling format is
163        * treated as being a bunch of narrow X-tiles placed next to each
164        * other.  This means that the tile width for Y-tiling is actually
165        * the width of one sub-column of the Y-major tile where each 4K
166        * tile has 8 512B sub-columns.
167        *
168        * The major Y value is the row of tiles in which the pixel lives.
169        * The major X value is the tile sub-column in which the pixel
170        * lives; for X tiling, this is the same as the tile column, for Y
171        * tiling, each tile has 8 sub-columns.  The minor X and Y indices
172        * are the position within the sub-column.
173        */
174 
175       /* Calculate the minor x and y indices. */
176       nir_def *minor = nir_ubfe(b, xypos, nir_imm_int(b, 0),
177                                        nir_trim_vector(b, tiling, 2));
178       nir_def *major = nir_ushr(b, xypos, nir_trim_vector(b, tiling, 2));
179 
180       /* Calculate the texel index from the start of the tile row and the
181        * vertical coordinate of the row.
182        * Equivalent to:
183        *   tmp.x = (major.x << tile.y << tile.x) +
184        *           (minor.y << tile.x) + minor.x
185        *   tmp.y = major.y << tile.y
186        */
187       nir_def *idx_x, *idx_y;
188       idx_x = nir_ishl(b, nir_channel(b, major, 0), nir_channel(b, tiling, 1));
189       idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 1));
190       idx_x = nir_ishl(b, idx_x, nir_channel(b, tiling, 0));
191       idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 0));
192       idx_y = nir_ishl(b, nir_channel(b, major, 1), nir_channel(b, tiling, 1));
193 
194       /* Add it to the start of the tile row. */
195       nir_def *idx;
196       idx = nir_imul(b, idx_y, nir_channel(b, stride, 1));
197       idx = nir_iadd(b, idx, idx_x);
198 
199       /* Multiply by the Bpp value. */
200       addr = nir_imul(b, idx, nir_channel(b, stride, 0));
201 
202       if (devinfo->ver < 8 && devinfo->platform != INTEL_PLATFORM_BYT) {
203          /* Take into account the two dynamically specified shifts.  Both are
204           * used to implement swizzling of X-tiled surfaces.  For Y-tiled
205           * surfaces only one bit needs to be XOR-ed with bit 6 of the memory
206           * address, so a swz value of 0xff (actually interpreted as 31 by the
207           * hardware) will be provided to cause the relevant bit of tmp.y to
208           * be zero and turn the first XOR into the identity.  For linear
209           * surfaces or platforms lacking address swizzling both shifts will
210           * be 0xff causing the relevant bits of both tmp.x and .y to be zero,
211           * what effectively disables swizzling.
212           */
213          nir_def *swizzle = load_image_param(b, deref, SWIZZLING);
214          nir_def *shift0 = nir_ushr(b, addr, nir_channel(b, swizzle, 0));
215          nir_def *shift1 = nir_ushr(b, addr, nir_channel(b, swizzle, 1));
216 
217          /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
218          nir_def *bit = nir_iand(b, nir_ixor(b, shift0, shift1),
219                                         nir_imm_int(b, 1 << 6));
220          addr = nir_ixor(b, addr, bit);
221       }
222    } else {
223       /* Multiply by the Bpp/stride value.  Note that the addr.y may be
224        * non-zero even if the image is one-dimensional because a vertical
225        * offset may have been applied above to select a non-zero slice or
226        * level of a higher-dimensional texture.
227        */
228       nir_def *idx;
229       idx = nir_imul(b, nir_channel(b, xypos, 1), nir_channel(b, stride, 1));
230       idx = nir_iadd(b, nir_channel(b, xypos, 0), idx);
231       addr = nir_imul(b, idx, nir_channel(b, stride, 0));
232    }
233 
234    return addr;
235 }
236 
237 struct format_info {
238    const struct isl_format_layout *fmtl;
239    unsigned chans;
240    unsigned bits[4];
241 };
242 
243 static struct format_info
get_format_info(enum isl_format fmt)244 get_format_info(enum isl_format fmt)
245 {
246    const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
247 
248    return (struct format_info) {
249       .fmtl = fmtl,
250       .chans = isl_format_get_num_channels(fmt),
251       .bits = {
252          fmtl->channels.r.bits,
253          fmtl->channels.g.bits,
254          fmtl->channels.b.bits,
255          fmtl->channels.a.bits
256       },
257    };
258 }
259 
260 static nir_def *
convert_color_for_load(nir_builder * b,const struct intel_device_info * devinfo,nir_def * color,enum isl_format image_fmt,enum isl_format lower_fmt,unsigned dest_components)261 convert_color_for_load(nir_builder *b, const struct intel_device_info *devinfo,
262                        nir_def *color,
263                        enum isl_format image_fmt, enum isl_format lower_fmt,
264                        unsigned dest_components)
265 {
266    if (image_fmt == lower_fmt)
267       goto expand_vec;
268 
269    if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
270       assert(lower_fmt == ISL_FORMAT_R32_UINT);
271       color = nir_format_unpack_11f11f10f(b, color);
272       goto expand_vec;
273    }
274 
275    struct format_info image = get_format_info(image_fmt);
276    struct format_info lower = get_format_info(lower_fmt);
277 
278    const bool needs_sign_extension =
279       isl_format_has_snorm_channel(image_fmt) ||
280       isl_format_has_sint_channel(image_fmt);
281 
282    /* We only check the red channel to detect if we need to pack/unpack */
283    assert(image.bits[0] != lower.bits[0] ||
284           memcmp(image.bits, lower.bits, sizeof(image.bits)) == 0);
285 
286    if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
287       if (needs_sign_extension)
288          color = nir_format_unpack_sint(b, color, image.bits, image.chans);
289       else
290          color = nir_format_unpack_uint(b, color, image.bits, image.chans);
291    } else {
292       /* All these formats are homogeneous */
293       for (unsigned i = 1; i < image.chans; i++)
294          assert(image.bits[i] == image.bits[0]);
295 
296       /* On IVB, we rely on the undocumented behavior that typed reads from
297        * surfaces of the unsupported R8 and R16 formats return useful data in
298        * their least significant bits.  However, the data in the high bits is
299        * garbage so we have to discard it.
300        */
301       if (devinfo->verx10 == 70 &&
302           (lower_fmt == ISL_FORMAT_R16_UINT ||
303            lower_fmt == ISL_FORMAT_R8_UINT))
304          color = nir_format_mask_uvec(b, color, lower.bits);
305 
306       if (image.bits[0] != lower.bits[0]) {
307          color = nir_format_bitcast_uvec_unmasked(b, color, lower.bits[0],
308                                                   image.bits[0]);
309       }
310 
311       if (needs_sign_extension)
312          color = nir_format_sign_extend_ivec(b, color, image.bits);
313    }
314 
315    switch (image.fmtl->channels.r.type) {
316    case ISL_UNORM:
317       assert(isl_format_has_uint_channel(lower_fmt));
318       color = nir_format_unorm_to_float(b, color, image.bits);
319       break;
320 
321    case ISL_SNORM:
322       assert(isl_format_has_uint_channel(lower_fmt));
323       color = nir_format_snorm_to_float(b, color, image.bits);
324       break;
325 
326    case ISL_SFLOAT:
327       if (image.bits[0] == 16)
328          color = nir_unpack_half_2x16_split_x(b, color);
329       break;
330 
331    case ISL_UINT:
332    case ISL_SINT:
333       break;
334 
335    default:
336       unreachable("Invalid image channel type");
337    }
338 
339 expand_vec:
340    assert(dest_components == 1 || dest_components == 4);
341    assert(color->num_components <= dest_components);
342    if (color->num_components == dest_components)
343       return color;
344 
345    nir_def *comps[4];
346    for (unsigned i = 0; i < color->num_components; i++)
347       comps[i] = nir_channel(b, color, i);
348 
349    for (unsigned i = color->num_components; i < 3; i++)
350       comps[i] = nir_imm_int(b, 0);
351 
352    if (color->num_components < 4) {
353       if (isl_format_has_int_channel(image_fmt))
354          comps[3] = nir_imm_int(b, 1);
355       else
356          comps[3] = nir_imm_float(b, 1);
357    }
358 
359    return nir_vec(b, comps, dest_components);
360 }
361 
362 static bool
lower_image_load_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin,bool sparse)363 lower_image_load_instr(nir_builder *b,
364                        const struct intel_device_info *devinfo,
365                        nir_intrinsic_instr *intrin,
366                        bool sparse)
367 {
368    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
369    nir_variable *var = nir_deref_instr_get_variable(deref);
370 
371    if (var->data.image.format == PIPE_FORMAT_NONE)
372       return false;
373 
374    const enum isl_format image_fmt =
375       isl_format_for_pipe_format(var->data.image.format);
376 
377    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
378       const enum isl_format lower_fmt =
379          isl_lower_storage_image_format(devinfo, image_fmt);
380       const unsigned dest_components =
381          sparse ? (intrin->num_components - 1) : intrin->num_components;
382 
383       /* Use an undef to hold the uses of the load while we do the color
384        * conversion.
385        */
386       nir_def *placeholder = nir_undef(b, 4, 32);
387       nir_def_rewrite_uses(&intrin->def, placeholder);
388 
389       intrin->num_components = isl_format_get_num_channels(lower_fmt);
390       intrin->def.num_components = intrin->num_components;
391 
392       b->cursor = nir_after_instr(&intrin->instr);
393 
394       nir_def *color = convert_color_for_load(b, devinfo,
395                                                   &intrin->def,
396                                                   image_fmt, lower_fmt,
397                                                   dest_components);
398 
399       if (sparse) {
400          /* Put the sparse component back on the original instruction */
401          intrin->num_components++;
402          intrin->def.num_components = intrin->num_components;
403 
404          /* Carry over the sparse component without modifying it with the
405           * converted color.
406           */
407          nir_def *sparse_color[NIR_MAX_VEC_COMPONENTS];
408          for (unsigned i = 0; i < dest_components; i++)
409             sparse_color[i] = nir_channel(b, color, i);
410          sparse_color[dest_components] =
411             nir_channel(b, &intrin->def, intrin->num_components - 1);
412          color = nir_vec(b, sparse_color, dest_components + 1);
413       }
414 
415       nir_def_rewrite_uses(placeholder, color);
416       nir_instr_remove(placeholder->parent_instr);
417    } else {
418       /* This code part is only useful prior to Gfx9, we do not have plans to
419        * enable sparse there.
420        */
421       assert(!sparse);
422 
423       const struct isl_format_layout *image_fmtl =
424          isl_format_get_layout(image_fmt);
425       /* We have a matching typed format for everything 32b and below */
426       assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
427       enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
428                                 ISL_FORMAT_R32G32_UINT :
429                                 ISL_FORMAT_R32G32B32A32_UINT;
430       const unsigned dest_components = intrin->num_components;
431 
432       b->cursor = nir_instr_remove(&intrin->instr);
433 
434       nir_def *coord = intrin->src[1].ssa;
435 
436       nir_def *do_load = image_coord_is_in_bounds(b, deref, coord);
437       if (devinfo->verx10 == 70) {
438          /* Check whether the first stride component (i.e. the Bpp value)
439           * is greater than four, what on Gfx7 indicates that a surface of
440           * type RAW has been bound for untyped access.  Reading or writing
441           * to a surface of type other than RAW using untyped surface
442           * messages causes a hang on IVB and VLV.
443           */
444          nir_def *stride = load_image_param(b, deref, STRIDE);
445          nir_def *is_raw =
446             nir_igt_imm(b, nir_channel(b, stride, 0), 4);
447          do_load = nir_iand(b, do_load, is_raw);
448       }
449       nir_push_if(b, do_load);
450 
451       nir_def *addr = image_address(b, devinfo, deref, coord);
452       nir_def *load =
453          nir_image_deref_load_raw_intel(b, image_fmtl->bpb / 32, 32,
454                                         &deref->def, addr);
455 
456       nir_push_else(b, NULL);
457 
458       nir_def *zero = nir_imm_zero(b, load->num_components, 32);
459 
460       nir_pop_if(b, NULL);
461 
462       nir_def *value = nir_if_phi(b, load, zero);
463 
464       nir_def *color = convert_color_for_load(b, devinfo, value,
465                                                   image_fmt, raw_fmt,
466                                                   dest_components);
467 
468       nir_def_rewrite_uses(&intrin->def, color);
469    }
470 
471    return true;
472 }
473 
474 static nir_def *
convert_color_for_store(nir_builder * b,const struct intel_device_info * devinfo,nir_def * color,enum isl_format image_fmt,enum isl_format lower_fmt)475 convert_color_for_store(nir_builder *b, const struct intel_device_info *devinfo,
476                         nir_def *color,
477                         enum isl_format image_fmt, enum isl_format lower_fmt)
478 {
479    struct format_info image = get_format_info(image_fmt);
480    struct format_info lower = get_format_info(lower_fmt);
481 
482    color = nir_trim_vector(b, color, image.chans);
483 
484    if (image_fmt == lower_fmt)
485       return color;
486 
487    if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
488       assert(lower_fmt == ISL_FORMAT_R32_UINT);
489       return nir_format_pack_11f11f10f(b, color);
490    }
491 
492    switch (image.fmtl->channels.r.type) {
493    case ISL_UNORM:
494       assert(isl_format_has_uint_channel(lower_fmt));
495       color = nir_format_float_to_unorm(b, color, image.bits);
496       break;
497 
498    case ISL_SNORM:
499       assert(isl_format_has_uint_channel(lower_fmt));
500       color = nir_format_float_to_snorm(b, color, image.bits);
501       break;
502 
503    case ISL_SFLOAT:
504       if (image.bits[0] == 16)
505          color = nir_format_float_to_half(b, color);
506       break;
507 
508    case ISL_UINT:
509       color = nir_format_clamp_uint(b, color, image.bits);
510       break;
511 
512    case ISL_SINT:
513       color = nir_format_clamp_sint(b, color, image.bits);
514       break;
515 
516    default:
517       unreachable("Invalid image channel type");
518    }
519 
520    if (image.bits[0] < 32 &&
521        (isl_format_has_snorm_channel(image_fmt) ||
522         isl_format_has_sint_channel(image_fmt)))
523       color = nir_format_mask_uvec(b, color, image.bits);
524 
525    if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
526       color = nir_format_pack_uint(b, color, image.bits, image.chans);
527    } else {
528       /* All these formats are homogeneous */
529       for (unsigned i = 1; i < image.chans; i++)
530          assert(image.bits[i] == image.bits[0]);
531 
532       if (image.bits[0] != lower.bits[0]) {
533          color = nir_format_bitcast_uvec_unmasked(b, color, image.bits[0],
534                                                   lower.bits[0]);
535       }
536    }
537 
538    return color;
539 }
540 
541 static bool
lower_image_store_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)542 lower_image_store_instr(nir_builder *b,
543                         const struct intel_device_info *devinfo,
544                         nir_intrinsic_instr *intrin)
545 {
546    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
547    nir_variable *var = nir_deref_instr_get_variable(deref);
548 
549    /* For write-only surfaces, we trust that the hardware can just do the
550     * conversion for us.
551     */
552    if (var->data.access & ACCESS_NON_READABLE)
553       return false;
554 
555    if (var->data.image.format == PIPE_FORMAT_NONE)
556       return false;
557 
558    const enum isl_format image_fmt =
559       isl_format_for_pipe_format(var->data.image.format);
560 
561    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
562       const enum isl_format lower_fmt =
563          isl_lower_storage_image_format(devinfo, image_fmt);
564 
565       /* Color conversion goes before the store */
566       b->cursor = nir_before_instr(&intrin->instr);
567 
568       nir_def *color = convert_color_for_store(b, devinfo,
569                                                    intrin->src[3].ssa,
570                                                    image_fmt, lower_fmt);
571       intrin->num_components = isl_format_get_num_channels(lower_fmt);
572       nir_src_rewrite(&intrin->src[3], color);
573    } else {
574       const struct isl_format_layout *image_fmtl =
575          isl_format_get_layout(image_fmt);
576       /* We have a matching typed format for everything 32b and below */
577       assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
578       enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
579                                 ISL_FORMAT_R32G32_UINT :
580                                 ISL_FORMAT_R32G32B32A32_UINT;
581 
582       b->cursor = nir_instr_remove(&intrin->instr);
583 
584       nir_def *coord = intrin->src[1].ssa;
585 
586       nir_def *do_store = image_coord_is_in_bounds(b, deref, coord);
587       if (devinfo->verx10 == 70) {
588          /* Check whether the first stride component (i.e. the Bpp value)
589           * is greater than four, what on Gfx7 indicates that a surface of
590           * type RAW has been bound for untyped access.  Reading or writing
591           * to a surface of type other than RAW using untyped surface
592           * messages causes a hang on IVB and VLV.
593           */
594          nir_def *stride = load_image_param(b, deref, STRIDE);
595          nir_def *is_raw =
596             nir_igt_imm(b, nir_channel(b, stride, 0), 4);
597          do_store = nir_iand(b, do_store, is_raw);
598       }
599       nir_push_if(b, do_store);
600 
601       nir_def *addr = image_address(b, devinfo, deref, coord);
602       nir_def *color = convert_color_for_store(b, devinfo,
603                                                    intrin->src[3].ssa,
604                                                    image_fmt, raw_fmt);
605 
606       nir_intrinsic_instr *store =
607          nir_intrinsic_instr_create(b->shader,
608                                     nir_intrinsic_image_deref_store_raw_intel);
609       store->src[0] = nir_src_for_ssa(&deref->def);
610       store->src[1] = nir_src_for_ssa(addr);
611       store->src[2] = nir_src_for_ssa(color);
612       store->num_components = image_fmtl->bpb / 32;
613       nir_builder_instr_insert(b, &store->instr);
614 
615       nir_pop_if(b, NULL);
616    }
617 
618    return true;
619 }
620 
621 static bool
lower_image_atomic_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)622 lower_image_atomic_instr(nir_builder *b,
623                          const struct intel_device_info *devinfo,
624                          nir_intrinsic_instr *intrin)
625 {
626    if (devinfo->verx10 >= 75)
627       return false;
628 
629    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
630 
631    b->cursor = nir_instr_remove(&intrin->instr);
632 
633    /* Use an undef to hold the uses of the load conversion. */
634    nir_def *placeholder = nir_undef(b, 4, 32);
635    nir_def_rewrite_uses(&intrin->def, placeholder);
636 
637    /* Check the first component of the size field to find out if the
638     * image is bound.  Necessary on IVB for typed atomics because
639     * they don't seem to respect null surfaces and will happily
640     * corrupt or read random memory when no image is bound.
641     */
642    nir_def *size = load_image_param(b, deref, SIZE);
643    nir_def *zero = nir_imm_int(b, 0);
644    nir_push_if(b, nir_ine(b, nir_channel(b, size, 0), zero));
645 
646    nir_builder_instr_insert(b, &intrin->instr);
647 
648    nir_pop_if(b, NULL);
649 
650    nir_def *result = nir_if_phi(b, &intrin->def, zero);
651    nir_def_rewrite_uses(placeholder, result);
652 
653    return true;
654 }
655 
656 static bool
lower_image_size_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)657 lower_image_size_instr(nir_builder *b,
658                        const struct intel_device_info *devinfo,
659                        nir_intrinsic_instr *intrin)
660 {
661    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
662    nir_variable *var = nir_deref_instr_get_variable(deref);
663 
664    /* For write-only images, we have an actual image surface so we fall back
665     * and let the back-end emit a TXS for this.
666     */
667    if (var->data.access & ACCESS_NON_READABLE)
668       return false;
669 
670    if (var->data.image.format == PIPE_FORMAT_NONE)
671       return false;
672 
673    /* If we have a matching typed format, then we have an actual image surface
674     * so we fall back and let the back-end emit a TXS for this.
675     */
676    const enum isl_format image_fmt =
677       isl_format_for_pipe_format(var->data.image.format);
678    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
679       return false;
680 
681    assert(nir_src_as_uint(intrin->src[1]) == 0);
682 
683    b->cursor = nir_instr_remove(&intrin->instr);
684 
685    nir_def *size = load_image_param(b, deref, SIZE);
686 
687    nir_def *comps[4] = { NULL, NULL, NULL, NULL };
688 
689    assert(nir_intrinsic_image_dim(intrin) != GLSL_SAMPLER_DIM_CUBE);
690    unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
691    for (unsigned c = 0; c < coord_comps; c++)
692       comps[c] = nir_channel(b, size, c);
693 
694    for (unsigned c = coord_comps; c < intrin->def.num_components; ++c)
695       comps[c] = nir_imm_int(b, 1);
696 
697    nir_def *vec = nir_vec(b, comps, intrin->def.num_components);
698    nir_def_rewrite_uses(&intrin->def, vec);
699 
700    return true;
701 }
702 
703 static bool
elk_nir_lower_storage_image_instr(nir_builder * b,nir_instr * instr,void * cb_data)704 elk_nir_lower_storage_image_instr(nir_builder *b,
705                                   nir_instr *instr,
706                                   void *cb_data)
707 {
708    if (instr->type != nir_instr_type_intrinsic)
709       return false;
710    const struct elk_nir_lower_storage_image_opts *opts = cb_data;
711 
712    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
713    switch (intrin->intrinsic) {
714    case nir_intrinsic_image_deref_load:
715       if (opts->lower_loads)
716          return lower_image_load_instr(b, opts->devinfo, intrin, false);
717       return false;
718 
719    case nir_intrinsic_image_deref_sparse_load:
720       if (opts->lower_loads)
721          return lower_image_load_instr(b, opts->devinfo, intrin, true);
722       return false;
723 
724    case nir_intrinsic_image_deref_store:
725       if (opts->lower_stores)
726          return lower_image_store_instr(b, opts->devinfo, intrin);
727       return false;
728 
729    case nir_intrinsic_image_deref_atomic:
730    case nir_intrinsic_image_deref_atomic_swap:
731       if (opts->lower_atomics)
732          return lower_image_atomic_instr(b, opts->devinfo, intrin);
733       return false;
734 
735    case nir_intrinsic_image_deref_size:
736       if (opts->lower_get_size)
737          return lower_image_size_instr(b, opts->devinfo, intrin);
738       return false;
739 
740    default:
741       /* Nothing to do */
742       return false;
743    }
744 }
745 
746 bool
elk_nir_lower_storage_image(nir_shader * shader,const struct elk_nir_lower_storage_image_opts * opts)747 elk_nir_lower_storage_image(nir_shader *shader,
748                             const struct elk_nir_lower_storage_image_opts *opts)
749 {
750    bool progress = false;
751 
752    const nir_lower_image_options image_options = {
753       .lower_cube_size = true,
754       .lower_image_samples_to_one = true,
755    };
756 
757    progress |= nir_lower_image(shader, &image_options);
758 
759    progress |= nir_shader_instructions_pass(shader,
760                                             elk_nir_lower_storage_image_instr,
761                                             nir_metadata_none,
762                                             (void *)opts);
763 
764    return progress;
765 }
766