1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "isl/isl.h"
25
26 #include "elk_nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/nir/nir_format_convert.h"
29
30 static nir_def *
_load_image_param(nir_builder * b,nir_deref_instr * deref,unsigned offset)31 _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset)
32 {
33 nir_intrinsic_instr *load =
34 nir_intrinsic_instr_create(b->shader,
35 nir_intrinsic_image_deref_load_param_intel);
36 load->src[0] = nir_src_for_ssa(&deref->def);
37 nir_intrinsic_set_base(load, offset / 4);
38
39 switch (offset) {
40 case ISL_IMAGE_PARAM_OFFSET_OFFSET:
41 case ISL_IMAGE_PARAM_SWIZZLING_OFFSET:
42 load->num_components = 2;
43 break;
44 case ISL_IMAGE_PARAM_TILING_OFFSET:
45 case ISL_IMAGE_PARAM_SIZE_OFFSET:
46 load->num_components = 3;
47 break;
48 case ISL_IMAGE_PARAM_STRIDE_OFFSET:
49 load->num_components = 4;
50 break;
51 default:
52 unreachable("Invalid param offset");
53 }
54 nir_def_init(&load->instr, &load->def, load->num_components, 32);
55
56 nir_builder_instr_insert(b, &load->instr);
57 return &load->def;
58 }
59
60 #define load_image_param(b, d, o) \
61 _load_image_param(b, d, ISL_IMAGE_PARAM_##o##_OFFSET)
62
63 static nir_def *
image_coord_is_in_bounds(nir_builder * b,nir_deref_instr * deref,nir_def * coord)64 image_coord_is_in_bounds(nir_builder *b, nir_deref_instr *deref,
65 nir_def *coord)
66 {
67 nir_def *size = load_image_param(b, deref, SIZE);
68 nir_def *cmp = nir_ilt(b, coord, size);
69
70 unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
71 nir_def *in_bounds = nir_imm_true(b);
72 for (unsigned i = 0; i < coord_comps; i++)
73 in_bounds = nir_iand(b, in_bounds, nir_channel(b, cmp, i));
74
75 return in_bounds;
76 }
77
78 /** Calculate the offset in memory of the texel given by \p coord.
79 *
80 * This is meant to be used with untyped surface messages to access a tiled
81 * surface, what involves taking into account the tiling and swizzling modes
82 * of the surface manually so it will hopefully not happen very often.
83 *
84 * The tiling algorithm implemented here matches either the X or Y tiling
85 * layouts supported by the hardware depending on the tiling coefficients
86 * passed to the program as uniforms. See Volume 1 Part 2 Section 4.5
87 * "Address Tiling Function" of the IVB PRM for an in-depth explanation of
88 * the hardware tiling format.
89 */
90 static nir_def *
image_address(nir_builder * b,const struct intel_device_info * devinfo,nir_deref_instr * deref,nir_def * coord)91 image_address(nir_builder *b, const struct intel_device_info *devinfo,
92 nir_deref_instr *deref, nir_def *coord)
93 {
94 if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_1D &&
95 glsl_sampler_type_is_array(deref->type)) {
96 /* It's easier if 1D arrays are treated like 2D arrays */
97 coord = nir_vec3(b, nir_channel(b, coord, 0),
98 nir_imm_int(b, 0),
99 nir_channel(b, coord, 1));
100 } else {
101 unsigned dims = glsl_get_sampler_coordinate_components(deref->type);
102 coord = nir_trim_vector(b, coord, dims);
103 }
104
105 nir_def *offset = load_image_param(b, deref, OFFSET);
106 nir_def *tiling = load_image_param(b, deref, TILING);
107 nir_def *stride = load_image_param(b, deref, STRIDE);
108
109 /* Shift the coordinates by the fixed surface offset. It may be non-zero
110 * if the image is a single slice of a higher-dimensional surface, or if a
111 * non-zero mipmap level of the surface is bound to the pipeline. The
112 * offset needs to be applied here rather than at surface state set-up time
113 * because the desired slice-level may start mid-tile, so simply shifting
114 * the surface base address wouldn't give a well-formed tiled surface in
115 * the general case.
116 */
117 nir_def *xypos = (coord->num_components == 1) ?
118 nir_vec2(b, coord, nir_imm_int(b, 0)) :
119 nir_trim_vector(b, coord, 2);
120 xypos = nir_iadd(b, xypos, offset);
121
122 /* The layout of 3-D textures in memory is sort-of like a tiling
123 * format. At each miplevel, the slices are arranged in rows of
124 * 2^level slices per row. The slice row is stored in tmp.y and
125 * the slice within the row is stored in tmp.x.
126 *
127 * The layout of 2-D array textures and cubemaps is much simpler:
128 * Depending on whether the ARYSPC_LOD0 layout is in use it will be
129 * stored in memory as an array of slices, each one being a 2-D
130 * arrangement of miplevels, or as a 2D arrangement of miplevels,
131 * each one being an array of slices. In either case the separation
132 * between slices of the same LOD is equal to the qpitch value
133 * provided as stride.w.
134 *
135 * This code can be made to handle either 2D arrays and 3D textures
136 * by passing in the miplevel as tile.z for 3-D textures and 0 in
137 * tile.z for 2-D array textures.
138 *
139 * See Volume 1 Part 1 of the Gfx7 PRM, sections 6.18.4.7 "Surface
140 * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
141 * of the hardware 3D texture and 2D array layouts.
142 */
143 if (coord->num_components > 2) {
144 /* Decompose z into a major (tmp.y) and a minor (tmp.x)
145 * index.
146 */
147 nir_def *z = nir_channel(b, coord, 2);
148 nir_def *z_x = nir_ubfe(b, z, nir_imm_int(b, 0),
149 nir_channel(b, tiling, 2));
150 nir_def *z_y = nir_ushr(b, z, nir_channel(b, tiling, 2));
151
152 /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
153 * slice offset.
154 */
155 xypos = nir_iadd(b, xypos, nir_imul(b, nir_vec2(b, z_x, z_y),
156 nir_channels(b, stride, 0xc)));
157 }
158
159 nir_def *addr;
160 if (coord->num_components > 1) {
161 /* Calculate the major/minor x and y indices. In order to
162 * accommodate both X and Y tiling, the Y-major tiling format is
163 * treated as being a bunch of narrow X-tiles placed next to each
164 * other. This means that the tile width for Y-tiling is actually
165 * the width of one sub-column of the Y-major tile where each 4K
166 * tile has 8 512B sub-columns.
167 *
168 * The major Y value is the row of tiles in which the pixel lives.
169 * The major X value is the tile sub-column in which the pixel
170 * lives; for X tiling, this is the same as the tile column, for Y
171 * tiling, each tile has 8 sub-columns. The minor X and Y indices
172 * are the position within the sub-column.
173 */
174
175 /* Calculate the minor x and y indices. */
176 nir_def *minor = nir_ubfe(b, xypos, nir_imm_int(b, 0),
177 nir_trim_vector(b, tiling, 2));
178 nir_def *major = nir_ushr(b, xypos, nir_trim_vector(b, tiling, 2));
179
180 /* Calculate the texel index from the start of the tile row and the
181 * vertical coordinate of the row.
182 * Equivalent to:
183 * tmp.x = (major.x << tile.y << tile.x) +
184 * (minor.y << tile.x) + minor.x
185 * tmp.y = major.y << tile.y
186 */
187 nir_def *idx_x, *idx_y;
188 idx_x = nir_ishl(b, nir_channel(b, major, 0), nir_channel(b, tiling, 1));
189 idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 1));
190 idx_x = nir_ishl(b, idx_x, nir_channel(b, tiling, 0));
191 idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 0));
192 idx_y = nir_ishl(b, nir_channel(b, major, 1), nir_channel(b, tiling, 1));
193
194 /* Add it to the start of the tile row. */
195 nir_def *idx;
196 idx = nir_imul(b, idx_y, nir_channel(b, stride, 1));
197 idx = nir_iadd(b, idx, idx_x);
198
199 /* Multiply by the Bpp value. */
200 addr = nir_imul(b, idx, nir_channel(b, stride, 0));
201
202 if (devinfo->ver < 8 && devinfo->platform != INTEL_PLATFORM_BYT) {
203 /* Take into account the two dynamically specified shifts. Both are
204 * used to implement swizzling of X-tiled surfaces. For Y-tiled
205 * surfaces only one bit needs to be XOR-ed with bit 6 of the memory
206 * address, so a swz value of 0xff (actually interpreted as 31 by the
207 * hardware) will be provided to cause the relevant bit of tmp.y to
208 * be zero and turn the first XOR into the identity. For linear
209 * surfaces or platforms lacking address swizzling both shifts will
210 * be 0xff causing the relevant bits of both tmp.x and .y to be zero,
211 * what effectively disables swizzling.
212 */
213 nir_def *swizzle = load_image_param(b, deref, SWIZZLING);
214 nir_def *shift0 = nir_ushr(b, addr, nir_channel(b, swizzle, 0));
215 nir_def *shift1 = nir_ushr(b, addr, nir_channel(b, swizzle, 1));
216
217 /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
218 nir_def *bit = nir_iand(b, nir_ixor(b, shift0, shift1),
219 nir_imm_int(b, 1 << 6));
220 addr = nir_ixor(b, addr, bit);
221 }
222 } else {
223 /* Multiply by the Bpp/stride value. Note that the addr.y may be
224 * non-zero even if the image is one-dimensional because a vertical
225 * offset may have been applied above to select a non-zero slice or
226 * level of a higher-dimensional texture.
227 */
228 nir_def *idx;
229 idx = nir_imul(b, nir_channel(b, xypos, 1), nir_channel(b, stride, 1));
230 idx = nir_iadd(b, nir_channel(b, xypos, 0), idx);
231 addr = nir_imul(b, idx, nir_channel(b, stride, 0));
232 }
233
234 return addr;
235 }
236
237 struct format_info {
238 const struct isl_format_layout *fmtl;
239 unsigned chans;
240 unsigned bits[4];
241 };
242
243 static struct format_info
get_format_info(enum isl_format fmt)244 get_format_info(enum isl_format fmt)
245 {
246 const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
247
248 return (struct format_info) {
249 .fmtl = fmtl,
250 .chans = isl_format_get_num_channels(fmt),
251 .bits = {
252 fmtl->channels.r.bits,
253 fmtl->channels.g.bits,
254 fmtl->channels.b.bits,
255 fmtl->channels.a.bits
256 },
257 };
258 }
259
260 static nir_def *
convert_color_for_load(nir_builder * b,const struct intel_device_info * devinfo,nir_def * color,enum isl_format image_fmt,enum isl_format lower_fmt,unsigned dest_components)261 convert_color_for_load(nir_builder *b, const struct intel_device_info *devinfo,
262 nir_def *color,
263 enum isl_format image_fmt, enum isl_format lower_fmt,
264 unsigned dest_components)
265 {
266 if (image_fmt == lower_fmt)
267 goto expand_vec;
268
269 if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
270 assert(lower_fmt == ISL_FORMAT_R32_UINT);
271 color = nir_format_unpack_11f11f10f(b, color);
272 goto expand_vec;
273 }
274
275 struct format_info image = get_format_info(image_fmt);
276 struct format_info lower = get_format_info(lower_fmt);
277
278 const bool needs_sign_extension =
279 isl_format_has_snorm_channel(image_fmt) ||
280 isl_format_has_sint_channel(image_fmt);
281
282 /* We only check the red channel to detect if we need to pack/unpack */
283 assert(image.bits[0] != lower.bits[0] ||
284 memcmp(image.bits, lower.bits, sizeof(image.bits)) == 0);
285
286 if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
287 if (needs_sign_extension)
288 color = nir_format_unpack_sint(b, color, image.bits, image.chans);
289 else
290 color = nir_format_unpack_uint(b, color, image.bits, image.chans);
291 } else {
292 /* All these formats are homogeneous */
293 for (unsigned i = 1; i < image.chans; i++)
294 assert(image.bits[i] == image.bits[0]);
295
296 /* On IVB, we rely on the undocumented behavior that typed reads from
297 * surfaces of the unsupported R8 and R16 formats return useful data in
298 * their least significant bits. However, the data in the high bits is
299 * garbage so we have to discard it.
300 */
301 if (devinfo->verx10 == 70 &&
302 (lower_fmt == ISL_FORMAT_R16_UINT ||
303 lower_fmt == ISL_FORMAT_R8_UINT))
304 color = nir_format_mask_uvec(b, color, lower.bits);
305
306 if (image.bits[0] != lower.bits[0]) {
307 color = nir_format_bitcast_uvec_unmasked(b, color, lower.bits[0],
308 image.bits[0]);
309 }
310
311 if (needs_sign_extension)
312 color = nir_format_sign_extend_ivec(b, color, image.bits);
313 }
314
315 switch (image.fmtl->channels.r.type) {
316 case ISL_UNORM:
317 assert(isl_format_has_uint_channel(lower_fmt));
318 color = nir_format_unorm_to_float(b, color, image.bits);
319 break;
320
321 case ISL_SNORM:
322 assert(isl_format_has_uint_channel(lower_fmt));
323 color = nir_format_snorm_to_float(b, color, image.bits);
324 break;
325
326 case ISL_SFLOAT:
327 if (image.bits[0] == 16)
328 color = nir_unpack_half_2x16_split_x(b, color);
329 break;
330
331 case ISL_UINT:
332 case ISL_SINT:
333 break;
334
335 default:
336 unreachable("Invalid image channel type");
337 }
338
339 expand_vec:
340 assert(dest_components == 1 || dest_components == 4);
341 assert(color->num_components <= dest_components);
342 if (color->num_components == dest_components)
343 return color;
344
345 nir_def *comps[4];
346 for (unsigned i = 0; i < color->num_components; i++)
347 comps[i] = nir_channel(b, color, i);
348
349 for (unsigned i = color->num_components; i < 3; i++)
350 comps[i] = nir_imm_int(b, 0);
351
352 if (color->num_components < 4) {
353 if (isl_format_has_int_channel(image_fmt))
354 comps[3] = nir_imm_int(b, 1);
355 else
356 comps[3] = nir_imm_float(b, 1);
357 }
358
359 return nir_vec(b, comps, dest_components);
360 }
361
362 static bool
lower_image_load_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin,bool sparse)363 lower_image_load_instr(nir_builder *b,
364 const struct intel_device_info *devinfo,
365 nir_intrinsic_instr *intrin,
366 bool sparse)
367 {
368 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
369 nir_variable *var = nir_deref_instr_get_variable(deref);
370
371 if (var->data.image.format == PIPE_FORMAT_NONE)
372 return false;
373
374 const enum isl_format image_fmt =
375 isl_format_for_pipe_format(var->data.image.format);
376
377 if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
378 const enum isl_format lower_fmt =
379 isl_lower_storage_image_format(devinfo, image_fmt);
380 const unsigned dest_components =
381 sparse ? (intrin->num_components - 1) : intrin->num_components;
382
383 /* Use an undef to hold the uses of the load while we do the color
384 * conversion.
385 */
386 nir_def *placeholder = nir_undef(b, 4, 32);
387 nir_def_rewrite_uses(&intrin->def, placeholder);
388
389 intrin->num_components = isl_format_get_num_channels(lower_fmt);
390 intrin->def.num_components = intrin->num_components;
391
392 b->cursor = nir_after_instr(&intrin->instr);
393
394 nir_def *color = convert_color_for_load(b, devinfo,
395 &intrin->def,
396 image_fmt, lower_fmt,
397 dest_components);
398
399 if (sparse) {
400 /* Put the sparse component back on the original instruction */
401 intrin->num_components++;
402 intrin->def.num_components = intrin->num_components;
403
404 /* Carry over the sparse component without modifying it with the
405 * converted color.
406 */
407 nir_def *sparse_color[NIR_MAX_VEC_COMPONENTS];
408 for (unsigned i = 0; i < dest_components; i++)
409 sparse_color[i] = nir_channel(b, color, i);
410 sparse_color[dest_components] =
411 nir_channel(b, &intrin->def, intrin->num_components - 1);
412 color = nir_vec(b, sparse_color, dest_components + 1);
413 }
414
415 nir_def_rewrite_uses(placeholder, color);
416 nir_instr_remove(placeholder->parent_instr);
417 } else {
418 /* This code part is only useful prior to Gfx9, we do not have plans to
419 * enable sparse there.
420 */
421 assert(!sparse);
422
423 const struct isl_format_layout *image_fmtl =
424 isl_format_get_layout(image_fmt);
425 /* We have a matching typed format for everything 32b and below */
426 assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
427 enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
428 ISL_FORMAT_R32G32_UINT :
429 ISL_FORMAT_R32G32B32A32_UINT;
430 const unsigned dest_components = intrin->num_components;
431
432 b->cursor = nir_instr_remove(&intrin->instr);
433
434 nir_def *coord = intrin->src[1].ssa;
435
436 nir_def *do_load = image_coord_is_in_bounds(b, deref, coord);
437 if (devinfo->verx10 == 70) {
438 /* Check whether the first stride component (i.e. the Bpp value)
439 * is greater than four, what on Gfx7 indicates that a surface of
440 * type RAW has been bound for untyped access. Reading or writing
441 * to a surface of type other than RAW using untyped surface
442 * messages causes a hang on IVB and VLV.
443 */
444 nir_def *stride = load_image_param(b, deref, STRIDE);
445 nir_def *is_raw =
446 nir_igt_imm(b, nir_channel(b, stride, 0), 4);
447 do_load = nir_iand(b, do_load, is_raw);
448 }
449 nir_push_if(b, do_load);
450
451 nir_def *addr = image_address(b, devinfo, deref, coord);
452 nir_def *load =
453 nir_image_deref_load_raw_intel(b, image_fmtl->bpb / 32, 32,
454 &deref->def, addr);
455
456 nir_push_else(b, NULL);
457
458 nir_def *zero = nir_imm_zero(b, load->num_components, 32);
459
460 nir_pop_if(b, NULL);
461
462 nir_def *value = nir_if_phi(b, load, zero);
463
464 nir_def *color = convert_color_for_load(b, devinfo, value,
465 image_fmt, raw_fmt,
466 dest_components);
467
468 nir_def_rewrite_uses(&intrin->def, color);
469 }
470
471 return true;
472 }
473
474 static nir_def *
convert_color_for_store(nir_builder * b,const struct intel_device_info * devinfo,nir_def * color,enum isl_format image_fmt,enum isl_format lower_fmt)475 convert_color_for_store(nir_builder *b, const struct intel_device_info *devinfo,
476 nir_def *color,
477 enum isl_format image_fmt, enum isl_format lower_fmt)
478 {
479 struct format_info image = get_format_info(image_fmt);
480 struct format_info lower = get_format_info(lower_fmt);
481
482 color = nir_trim_vector(b, color, image.chans);
483
484 if (image_fmt == lower_fmt)
485 return color;
486
487 if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
488 assert(lower_fmt == ISL_FORMAT_R32_UINT);
489 return nir_format_pack_11f11f10f(b, color);
490 }
491
492 switch (image.fmtl->channels.r.type) {
493 case ISL_UNORM:
494 assert(isl_format_has_uint_channel(lower_fmt));
495 color = nir_format_float_to_unorm(b, color, image.bits);
496 break;
497
498 case ISL_SNORM:
499 assert(isl_format_has_uint_channel(lower_fmt));
500 color = nir_format_float_to_snorm(b, color, image.bits);
501 break;
502
503 case ISL_SFLOAT:
504 if (image.bits[0] == 16)
505 color = nir_format_float_to_half(b, color);
506 break;
507
508 case ISL_UINT:
509 color = nir_format_clamp_uint(b, color, image.bits);
510 break;
511
512 case ISL_SINT:
513 color = nir_format_clamp_sint(b, color, image.bits);
514 break;
515
516 default:
517 unreachable("Invalid image channel type");
518 }
519
520 if (image.bits[0] < 32 &&
521 (isl_format_has_snorm_channel(image_fmt) ||
522 isl_format_has_sint_channel(image_fmt)))
523 color = nir_format_mask_uvec(b, color, image.bits);
524
525 if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
526 color = nir_format_pack_uint(b, color, image.bits, image.chans);
527 } else {
528 /* All these formats are homogeneous */
529 for (unsigned i = 1; i < image.chans; i++)
530 assert(image.bits[i] == image.bits[0]);
531
532 if (image.bits[0] != lower.bits[0]) {
533 color = nir_format_bitcast_uvec_unmasked(b, color, image.bits[0],
534 lower.bits[0]);
535 }
536 }
537
538 return color;
539 }
540
541 static bool
lower_image_store_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)542 lower_image_store_instr(nir_builder *b,
543 const struct intel_device_info *devinfo,
544 nir_intrinsic_instr *intrin)
545 {
546 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
547 nir_variable *var = nir_deref_instr_get_variable(deref);
548
549 /* For write-only surfaces, we trust that the hardware can just do the
550 * conversion for us.
551 */
552 if (var->data.access & ACCESS_NON_READABLE)
553 return false;
554
555 if (var->data.image.format == PIPE_FORMAT_NONE)
556 return false;
557
558 const enum isl_format image_fmt =
559 isl_format_for_pipe_format(var->data.image.format);
560
561 if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
562 const enum isl_format lower_fmt =
563 isl_lower_storage_image_format(devinfo, image_fmt);
564
565 /* Color conversion goes before the store */
566 b->cursor = nir_before_instr(&intrin->instr);
567
568 nir_def *color = convert_color_for_store(b, devinfo,
569 intrin->src[3].ssa,
570 image_fmt, lower_fmt);
571 intrin->num_components = isl_format_get_num_channels(lower_fmt);
572 nir_src_rewrite(&intrin->src[3], color);
573 } else {
574 const struct isl_format_layout *image_fmtl =
575 isl_format_get_layout(image_fmt);
576 /* We have a matching typed format for everything 32b and below */
577 assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
578 enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
579 ISL_FORMAT_R32G32_UINT :
580 ISL_FORMAT_R32G32B32A32_UINT;
581
582 b->cursor = nir_instr_remove(&intrin->instr);
583
584 nir_def *coord = intrin->src[1].ssa;
585
586 nir_def *do_store = image_coord_is_in_bounds(b, deref, coord);
587 if (devinfo->verx10 == 70) {
588 /* Check whether the first stride component (i.e. the Bpp value)
589 * is greater than four, what on Gfx7 indicates that a surface of
590 * type RAW has been bound for untyped access. Reading or writing
591 * to a surface of type other than RAW using untyped surface
592 * messages causes a hang on IVB and VLV.
593 */
594 nir_def *stride = load_image_param(b, deref, STRIDE);
595 nir_def *is_raw =
596 nir_igt_imm(b, nir_channel(b, stride, 0), 4);
597 do_store = nir_iand(b, do_store, is_raw);
598 }
599 nir_push_if(b, do_store);
600
601 nir_def *addr = image_address(b, devinfo, deref, coord);
602 nir_def *color = convert_color_for_store(b, devinfo,
603 intrin->src[3].ssa,
604 image_fmt, raw_fmt);
605
606 nir_intrinsic_instr *store =
607 nir_intrinsic_instr_create(b->shader,
608 nir_intrinsic_image_deref_store_raw_intel);
609 store->src[0] = nir_src_for_ssa(&deref->def);
610 store->src[1] = nir_src_for_ssa(addr);
611 store->src[2] = nir_src_for_ssa(color);
612 store->num_components = image_fmtl->bpb / 32;
613 nir_builder_instr_insert(b, &store->instr);
614
615 nir_pop_if(b, NULL);
616 }
617
618 return true;
619 }
620
621 static bool
lower_image_atomic_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)622 lower_image_atomic_instr(nir_builder *b,
623 const struct intel_device_info *devinfo,
624 nir_intrinsic_instr *intrin)
625 {
626 if (devinfo->verx10 >= 75)
627 return false;
628
629 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
630
631 b->cursor = nir_instr_remove(&intrin->instr);
632
633 /* Use an undef to hold the uses of the load conversion. */
634 nir_def *placeholder = nir_undef(b, 4, 32);
635 nir_def_rewrite_uses(&intrin->def, placeholder);
636
637 /* Check the first component of the size field to find out if the
638 * image is bound. Necessary on IVB for typed atomics because
639 * they don't seem to respect null surfaces and will happily
640 * corrupt or read random memory when no image is bound.
641 */
642 nir_def *size = load_image_param(b, deref, SIZE);
643 nir_def *zero = nir_imm_int(b, 0);
644 nir_push_if(b, nir_ine(b, nir_channel(b, size, 0), zero));
645
646 nir_builder_instr_insert(b, &intrin->instr);
647
648 nir_pop_if(b, NULL);
649
650 nir_def *result = nir_if_phi(b, &intrin->def, zero);
651 nir_def_rewrite_uses(placeholder, result);
652
653 return true;
654 }
655
656 static bool
lower_image_size_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)657 lower_image_size_instr(nir_builder *b,
658 const struct intel_device_info *devinfo,
659 nir_intrinsic_instr *intrin)
660 {
661 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
662 nir_variable *var = nir_deref_instr_get_variable(deref);
663
664 /* For write-only images, we have an actual image surface so we fall back
665 * and let the back-end emit a TXS for this.
666 */
667 if (var->data.access & ACCESS_NON_READABLE)
668 return false;
669
670 if (var->data.image.format == PIPE_FORMAT_NONE)
671 return false;
672
673 /* If we have a matching typed format, then we have an actual image surface
674 * so we fall back and let the back-end emit a TXS for this.
675 */
676 const enum isl_format image_fmt =
677 isl_format_for_pipe_format(var->data.image.format);
678 if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
679 return false;
680
681 assert(nir_src_as_uint(intrin->src[1]) == 0);
682
683 b->cursor = nir_instr_remove(&intrin->instr);
684
685 nir_def *size = load_image_param(b, deref, SIZE);
686
687 nir_def *comps[4] = { NULL, NULL, NULL, NULL };
688
689 assert(nir_intrinsic_image_dim(intrin) != GLSL_SAMPLER_DIM_CUBE);
690 unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
691 for (unsigned c = 0; c < coord_comps; c++)
692 comps[c] = nir_channel(b, size, c);
693
694 for (unsigned c = coord_comps; c < intrin->def.num_components; ++c)
695 comps[c] = nir_imm_int(b, 1);
696
697 nir_def *vec = nir_vec(b, comps, intrin->def.num_components);
698 nir_def_rewrite_uses(&intrin->def, vec);
699
700 return true;
701 }
702
703 static bool
elk_nir_lower_storage_image_instr(nir_builder * b,nir_instr * instr,void * cb_data)704 elk_nir_lower_storage_image_instr(nir_builder *b,
705 nir_instr *instr,
706 void *cb_data)
707 {
708 if (instr->type != nir_instr_type_intrinsic)
709 return false;
710 const struct elk_nir_lower_storage_image_opts *opts = cb_data;
711
712 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
713 switch (intrin->intrinsic) {
714 case nir_intrinsic_image_deref_load:
715 if (opts->lower_loads)
716 return lower_image_load_instr(b, opts->devinfo, intrin, false);
717 return false;
718
719 case nir_intrinsic_image_deref_sparse_load:
720 if (opts->lower_loads)
721 return lower_image_load_instr(b, opts->devinfo, intrin, true);
722 return false;
723
724 case nir_intrinsic_image_deref_store:
725 if (opts->lower_stores)
726 return lower_image_store_instr(b, opts->devinfo, intrin);
727 return false;
728
729 case nir_intrinsic_image_deref_atomic:
730 case nir_intrinsic_image_deref_atomic_swap:
731 if (opts->lower_atomics)
732 return lower_image_atomic_instr(b, opts->devinfo, intrin);
733 return false;
734
735 case nir_intrinsic_image_deref_size:
736 if (opts->lower_get_size)
737 return lower_image_size_instr(b, opts->devinfo, intrin);
738 return false;
739
740 default:
741 /* Nothing to do */
742 return false;
743 }
744 }
745
746 bool
elk_nir_lower_storage_image(nir_shader * shader,const struct elk_nir_lower_storage_image_opts * opts)747 elk_nir_lower_storage_image(nir_shader *shader,
748 const struct elk_nir_lower_storage_image_opts *opts)
749 {
750 bool progress = false;
751
752 const nir_lower_image_options image_options = {
753 .lower_cube_size = true,
754 .lower_image_samples_to_one = true,
755 };
756
757 progress |= nir_lower_image(shader, &image_options);
758
759 progress |= nir_shader_instructions_pass(shader,
760 elk_nir_lower_storage_image_instr,
761 nir_metadata_none,
762 (void *)opts);
763
764 return progress;
765 }
766