1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Faith Ekstrand ([email protected])
25 *
26 */
27
28 #include "glsl_types.h"
29 #include "vtn_private.h"
30 #include "nir/nir_vla.h"
31 #include "nir/nir_control_flow.h"
32 #include "nir/nir_constant_expressions.h"
33 #include "nir/nir_deref.h"
34 #include "spirv_info.h"
35
36 #include "util/format/u_format.h"
37 #include "util/u_math.h"
38 #include "util/u_string.h"
39 #include "util/u_debug.h"
40 #include "util/mesa-blake3.h"
41
42 #include <stdio.h>
43
44 /* Table of all implemented capabilities. These are the capabilities that are
45 * implemented in the spirv_to_nir, not what the device supports.
46 *
47 * This list should remain alphabetized. For the purposes of alphabetization,
48 * suffixes do not exist and 8 comes before 16.
49 */
50 static const struct spirv_capabilities implemented_capabilities = {
51 .Addresses = true,
52 .AtomicFloat16AddEXT = true,
53 .AtomicFloat32AddEXT = true,
54 .AtomicFloat64AddEXT = true,
55 .AtomicFloat16MinMaxEXT = true,
56 .AtomicFloat32MinMaxEXT = true,
57 .AtomicFloat64MinMaxEXT = true,
58 .AtomicStorage = true,
59 .ClipDistance = true,
60 .ComputeDerivativeGroupLinearKHR = true,
61 .ComputeDerivativeGroupQuadsKHR = true,
62 .CooperativeMatrixKHR = true,
63 .CullDistance = true,
64 .DemoteToHelperInvocation = true,
65 .DenormFlushToZero = true,
66 .DenormPreserve = true,
67 .DerivativeControl = true,
68 .DeviceGroup = true,
69 .DotProduct = true,
70 .DotProductInput4x8Bit = true,
71 .DotProductInput4x8BitPacked = true,
72 .DotProductInputAll = true,
73 .DrawParameters = true,
74 .ExpectAssumeKHR = true,
75 .Float16 = true,
76 .Float16Buffer = true,
77 .Float64 = true,
78 .FloatControls2 = true,
79 .FragmentBarycentricKHR = true,
80 .FragmentDensityEXT = true,
81 .FragmentFullyCoveredEXT = true,
82 .FragmentMaskAMD = true,
83 .FragmentShaderPixelInterlockEXT = true,
84 .FragmentShaderSampleInterlockEXT = true,
85 .FragmentShadingRateKHR = true,
86 .GenericPointer = true,
87 .Geometry = true,
88 .GeometryPointSize = true,
89 .GeometryStreams = true,
90 .GroupNonUniform = true,
91 .GroupNonUniformArithmetic = true,
92 .GroupNonUniformBallot = true,
93 .GroupNonUniformClustered = true,
94 .GroupNonUniformQuad = true,
95 .GroupNonUniformRotateKHR = true,
96 .GroupNonUniformShuffle = true,
97 .GroupNonUniformShuffleRelative = true,
98 .GroupNonUniformVote = true,
99 .Groups = true,
100 .Image1D = true,
101 .ImageBasic = true,
102 .ImageBuffer = true,
103 .ImageCubeArray = true,
104 .ImageGatherBiasLodAMD = true,
105 .ImageGatherExtended = true,
106 .ImageMipmap = true,
107 .ImageMSArray = true,
108 .ImageQuery = true,
109 .ImageReadWrite = true,
110 .ImageReadWriteLodAMD = true,
111 .ImageRect = true,
112 .InputAttachment = true,
113 .InputAttachmentArrayDynamicIndexingEXT = true,
114 .InputAttachmentArrayNonUniformIndexingEXT = true,
115 .Int8 = true,
116 .Int16 = true,
117 .Int64 = true,
118 .Int64Atomics = true,
119 .Int64ImageEXT = true,
120 .IntegerFunctions2INTEL = true,
121 .InterpolationFunction = true,
122 .Kernel = true,
123 .Linkage = true,
124 .LiteralSampler = true,
125 .Matrix = true,
126 .MeshShadingEXT = true,
127 .MeshShadingNV = true,
128 .MinLod = true,
129 .MultiView = true,
130 .MultiViewport = true,
131 .PerViewAttributesNV = true,
132 .PhysicalStorageBufferAddresses = true,
133 .QuadControlKHR = true,
134 .RayCullMaskKHR = true,
135 .RayQueryKHR = true,
136 .RayQueryPositionFetchKHR = true,
137 .RayTracingKHR = true,
138 .RayTracingPositionFetchKHR = true,
139 .RayTraversalPrimitiveCullingKHR = true,
140 .ReplicatedCompositesEXT = true,
141 .RoundingModeRTE = true,
142 .RoundingModeRTZ = true,
143 .RuntimeDescriptorArrayEXT = true,
144 .Sampled1D = true,
145 .SampledBuffer = true,
146 .SampledCubeArray = true,
147 .SampledImageArrayDynamicIndexing = true,
148 .SampledImageArrayNonUniformIndexingEXT = true,
149 .SampledRect = true,
150 .SampleMaskPostDepthCoverage = true,
151 .SampleRateShading = true,
152 .Shader = true,
153 .ShaderClockKHR = true,
154 .ShaderEnqueueAMDX = true,
155 .ShaderLayer = true,
156 .ShaderNonUniformEXT = true,
157 .ShaderSMBuiltinsNV = true,
158 .ShaderViewportIndex = true,
159 .ShaderViewportIndexLayerEXT = true,
160 .ShaderViewportMaskNV = true,
161 .SignedZeroInfNanPreserve = true,
162 .SparseResidency = true,
163 .StencilExportEXT = true,
164 .StorageBuffer8BitAccess = true,
165 .StorageBufferArrayDynamicIndexing = true,
166 .StorageBufferArrayNonUniformIndexingEXT = true,
167 .StorageImageArrayDynamicIndexing = true,
168 .StorageImageArrayNonUniformIndexingEXT = true,
169 .StorageImageExtendedFormats = true,
170 .StorageImageMultisample = true,
171 .StorageImageReadWithoutFormat = true,
172 .StorageImageWriteWithoutFormat = true,
173 .StorageInputOutput16 = true,
174 .StoragePushConstant8 = true,
175 .StoragePushConstant16 = true,
176 .StorageTexelBufferArrayDynamicIndexingEXT = true,
177 .StorageTexelBufferArrayNonUniformIndexingEXT = true,
178 .StorageUniform16 = true,
179 .StorageUniformBufferBlock16 = true,
180 .SubgroupBallotKHR = true,
181 .SubgroupBufferBlockIOINTEL = true,
182 .SubgroupShuffleINTEL = true,
183 .SubgroupVoteKHR = true,
184 .Tessellation = true,
185 .TessellationPointSize = true,
186 .TransformFeedback = true,
187 .UniformAndStorageBuffer8BitAccess = true,
188 .UniformBufferArrayDynamicIndexing = true,
189 .UniformBufferArrayNonUniformIndexingEXT = true,
190 .UniformTexelBufferArrayDynamicIndexingEXT = true,
191 .UniformTexelBufferArrayNonUniformIndexingEXT = true,
192 .VariablePointers = true,
193 .VariablePointersStorageBuffer = true,
194 .Vector16 = true,
195 .VulkanMemoryModel = true,
196 .VulkanMemoryModelDeviceScope = true,
197 .WorkgroupMemoryExplicitLayoutKHR = true,
198 .WorkgroupMemoryExplicitLayout8BitAccessKHR = true,
199 .WorkgroupMemoryExplicitLayout16BitAccessKHR = true,
200 };
201
202 uint32_t mesa_spirv_debug = 0;
203
204 static const struct debug_named_value mesa_spirv_debug_control[] = {
205 { "structured", MESA_SPIRV_DEBUG_STRUCTURED,
206 "Print information of the SPIR-V structured control flow parsing" },
207 { "values", MESA_SPIRV_DEBUG_VALUES,
208 "Print information of the SPIR-V values" },
209 { "asm", MESA_SPIRV_DEBUG_ASM, "Print the SPIR-V assembly" },
210 { "color", MESA_SPIRV_DEBUG_COLOR, "Debug in color, if available" },
211 DEBUG_NAMED_VALUE_END,
212 };
213
214 DEBUG_GET_ONCE_FLAGS_OPTION(mesa_spirv_debug, "MESA_SPIRV_DEBUG", mesa_spirv_debug_control, 0)
215
216 /* DO NOT CALL THIS FUNCTION DIRECTLY. Use mesa_spirv_debug_init() instead */
217 static void
initialize_mesa_spirv_debug(void)218 initialize_mesa_spirv_debug(void)
219 {
220 mesa_spirv_debug = debug_get_option_mesa_spirv_debug();
221 }
222
223 static void
mesa_spirv_debug_init(void)224 mesa_spirv_debug_init(void)
225 {
226 static once_flag initialized_debug_flag = ONCE_FLAG_INIT;
227 call_once(&initialized_debug_flag, initialize_mesa_spirv_debug);
228 }
229
230 #ifndef NDEBUG
231 static enum nir_spirv_debug_level
vtn_default_log_level(void)232 vtn_default_log_level(void)
233 {
234 enum nir_spirv_debug_level level = NIR_SPIRV_DEBUG_LEVEL_WARNING;
235 const char *vtn_log_level_strings[] = {
236 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = "warning",
237 [NIR_SPIRV_DEBUG_LEVEL_INFO] = "info",
238 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = "error",
239 };
240 const char *str = getenv("MESA_SPIRV_LOG_LEVEL");
241
242 if (str == NULL)
243 return level;
244
245 for (int i = 0; i < ARRAY_SIZE(vtn_log_level_strings); i++) {
246 if (strcasecmp(str, vtn_log_level_strings[i]) == 0) {
247 level = i;
248 break;
249 }
250 }
251
252 return level;
253 }
254 #endif
255
256 void
vtn_log(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)257 vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level,
258 size_t spirv_offset, const char *message)
259 {
260 if (b->options->debug.func) {
261 b->options->debug.func(b->options->debug.private_data,
262 level, spirv_offset, message);
263 }
264
265 #ifndef NDEBUG
266 static enum nir_spirv_debug_level default_level =
267 NIR_SPIRV_DEBUG_LEVEL_INVALID;
268
269 if (default_level == NIR_SPIRV_DEBUG_LEVEL_INVALID)
270 default_level = vtn_default_log_level();
271
272 if (level >= default_level)
273 fprintf(stderr, "%s\n", message);
274 #endif
275 }
276
277 void
vtn_logf(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * fmt,...)278 vtn_logf(struct vtn_builder *b, enum nir_spirv_debug_level level,
279 size_t spirv_offset, const char *fmt, ...)
280 {
281 va_list args;
282 char *msg;
283
284 va_start(args, fmt);
285 msg = ralloc_vasprintf(NULL, fmt, args);
286 va_end(args);
287
288 vtn_log(b, level, spirv_offset, msg);
289
290 ralloc_free(msg);
291 }
292
293 static void
vtn_log_err(struct vtn_builder * b,enum nir_spirv_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)294 vtn_log_err(struct vtn_builder *b,
295 enum nir_spirv_debug_level level, const char *prefix,
296 const char *file, unsigned line,
297 const char *fmt, va_list args)
298 {
299 char *msg;
300
301 msg = ralloc_strdup(NULL, prefix);
302
303 #ifndef NDEBUG
304 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line);
305 #endif
306
307 ralloc_asprintf_append(&msg, " ");
308
309 ralloc_vasprintf_append(&msg, fmt, args);
310
311 ralloc_asprintf_append(&msg, "\n %zu bytes into the SPIR-V binary",
312 b->spirv_offset);
313
314 if (b->file) {
315 ralloc_asprintf_append(&msg,
316 "\n in SPIR-V source file %s, line %d, col %d",
317 b->file, b->line, b->col);
318 }
319
320 vtn_log(b, level, b->spirv_offset, msg);
321
322 ralloc_free(msg);
323 }
324
325 static void
vtn_dump_shader(struct vtn_builder * b,const char * path,const char * prefix)326 vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix)
327 {
328 static int idx = 0;
329
330 char filename[1024];
331 int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv",
332 path, prefix, idx++);
333 if (len < 0 || len >= sizeof(filename))
334 return;
335
336 FILE *f = fopen(filename, "wb");
337 if (f == NULL)
338 return;
339
340 fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f);
341 fclose(f);
342
343 vtn_info("SPIR-V shader dumped to %s", filename);
344 }
345
346 void
_vtn_warn(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)347 _vtn_warn(struct vtn_builder *b, const char *file, unsigned line,
348 const char *fmt, ...)
349 {
350 va_list args;
351
352 va_start(args, fmt);
353 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_WARNING, "SPIR-V WARNING:\n",
354 file, line, fmt, args);
355 va_end(args);
356 }
357
358 void
_vtn_err(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)359 _vtn_err(struct vtn_builder *b, const char *file, unsigned line,
360 const char *fmt, ...)
361 {
362 va_list args;
363
364 va_start(args, fmt);
365 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V ERROR:\n",
366 file, line, fmt, args);
367 va_end(args);
368 }
369
370 void
_vtn_fail(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)371 _vtn_fail(struct vtn_builder *b, const char *file, unsigned line,
372 const char *fmt, ...)
373 {
374 va_list args;
375
376 if (MESA_SPIRV_DEBUG(VALUES))
377 vtn_dump_values(b, stderr);
378
379 va_start(args, fmt);
380 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V parsing FAILED:\n",
381 file, line, fmt, args);
382 va_end(args);
383
384 const char *dump_path = secure_getenv("MESA_SPIRV_FAIL_DUMP_PATH");
385 if (dump_path)
386 vtn_dump_shader(b, dump_path, "fail");
387
388 #ifndef NDEBUG
389 if (!b->options->skip_os_break_in_debug_build)
390 os_break();
391 #endif
392
393 vtn_longjmp(b->fail_jump, 1);
394 }
395
396 const char *
vtn_value_type_to_string(enum vtn_value_type t)397 vtn_value_type_to_string(enum vtn_value_type t)
398 {
399 #define CASE(typ) case vtn_value_type_##typ: return #typ
400 switch (t) {
401 CASE(invalid);
402 CASE(undef);
403 CASE(string);
404 CASE(decoration_group);
405 CASE(type);
406 CASE(constant);
407 CASE(pointer);
408 CASE(function);
409 CASE(block);
410 CASE(ssa);
411 CASE(extension);
412 CASE(image_pointer);
413 }
414 #undef CASE
415 unreachable("unknown value type");
416 return "UNKNOWN";
417 }
418
419 static const char *
vtn_base_type_to_string(enum vtn_base_type t)420 vtn_base_type_to_string(enum vtn_base_type t)
421 {
422 #define CASE(typ) case vtn_base_type_##typ: return #typ
423 switch (t) {
424 CASE(void);
425 CASE(scalar);
426 CASE(vector);
427 CASE(matrix);
428 CASE(array);
429 CASE(struct);
430 CASE(pointer);
431 CASE(image);
432 CASE(sampler);
433 CASE(sampled_image);
434 CASE(accel_struct);
435 CASE(ray_query);
436 CASE(function);
437 CASE(event);
438 CASE(cooperative_matrix);
439 }
440 #undef CASE
441 unreachable("unknown base type");
442 return "UNKNOWN";
443 }
444
445
446 void
_vtn_fail_value_type_mismatch(struct vtn_builder * b,uint32_t value_id,enum vtn_value_type value_type)447 _vtn_fail_value_type_mismatch(struct vtn_builder *b, uint32_t value_id,
448 enum vtn_value_type value_type)
449 {
450 struct vtn_value *val = vtn_untyped_value(b, value_id);
451 vtn_fail(
452 "SPIR-V id %u is the wrong kind of value: "
453 "expected '%s' but got '%s'",
454 vtn_id_for_value(b, val),
455 vtn_value_type_to_string(value_type),
456 vtn_value_type_to_string(val->value_type));
457 }
458
_vtn_fail_value_not_pointer(struct vtn_builder * b,uint32_t value_id)459 void _vtn_fail_value_not_pointer(struct vtn_builder *b,
460 uint32_t value_id)
461 {
462 struct vtn_value *val = vtn_untyped_value(b, value_id);
463 vtn_fail("SPIR-V id %u is the wrong kind of value: "
464 "expected 'pointer' OR null constant but got "
465 "'%s' (%s)", value_id,
466 vtn_value_type_to_string(val->value_type),
467 val->is_null_constant ? "null constant" : "not null constant");
468 }
469
470 static struct vtn_ssa_value *
vtn_undef_ssa_value(struct vtn_builder * b,const struct glsl_type * type)471 vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
472 {
473 struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
474 val->type = glsl_get_bare_type(type);
475
476 if (glsl_type_is_cmat(type)) {
477 nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_undef");
478 vtn_set_ssa_value_var(b, val, mat->var);
479 } else if (glsl_type_is_vector_or_scalar(type)) {
480 unsigned num_components = glsl_get_vector_elements(val->type);
481 unsigned bit_size = glsl_get_bit_size(val->type);
482 val->def = nir_undef(&b->nb, num_components, bit_size);
483 } else {
484 unsigned elems = glsl_get_length(val->type);
485 val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
486 if (glsl_type_is_array_or_matrix(type)) {
487 const struct glsl_type *elem_type = glsl_get_array_element(type);
488 for (unsigned i = 0; i < elems; i++)
489 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
490 } else {
491 vtn_assert(glsl_type_is_struct_or_ifc(type));
492 for (unsigned i = 0; i < elems; i++) {
493 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
494 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
495 }
496 }
497 }
498
499 return val;
500 }
501
502 struct vtn_ssa_value *
vtn_const_ssa_value(struct vtn_builder * b,nir_constant * constant,const struct glsl_type * type)503 vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
504 const struct glsl_type *type)
505 {
506 struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
507 val->type = glsl_get_bare_type(type);
508
509 if (glsl_type_is_cmat(type)) {
510 const struct glsl_type *element_type = glsl_get_cmat_element(type);
511
512 nir_deref_instr *mat = vtn_create_cmat_temporary(b, type, "cmat_constant");
513 nir_cmat_construct(&b->nb, &mat->def,
514 nir_build_imm(&b->nb, 1, glsl_get_bit_size(element_type),
515 constant->values));
516 vtn_set_ssa_value_var(b, val, mat->var);
517 } else if (glsl_type_is_vector_or_scalar(type)) {
518 val->def = nir_build_imm(&b->nb, glsl_get_vector_elements(val->type),
519 glsl_get_bit_size(val->type),
520 constant->values);
521 } else {
522 unsigned elems = glsl_get_length(val->type);
523 val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
524 if (glsl_type_is_array_or_matrix(type)) {
525 const struct glsl_type *elem_type = glsl_get_array_element(type);
526 for (unsigned i = 0; i < elems; i++) {
527 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
528 elem_type);
529 }
530 } else {
531 vtn_assert(glsl_type_is_struct_or_ifc(type));
532 for (unsigned i = 0; i < elems; i++) {
533 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
534 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
535 elem_type);
536 }
537 }
538 }
539
540 return val;
541 }
542
543 struct vtn_ssa_value *
vtn_ssa_value(struct vtn_builder * b,uint32_t value_id)544 vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
545 {
546 struct vtn_value *val = vtn_untyped_value(b, value_id);
547 switch (val->value_type) {
548 case vtn_value_type_undef:
549 return vtn_undef_ssa_value(b, val->type->type);
550
551 case vtn_value_type_constant:
552 return vtn_const_ssa_value(b, val->constant, val->type->type);
553
554 case vtn_value_type_ssa:
555 return val->ssa;
556
557 case vtn_value_type_pointer:
558 vtn_assert(val->pointer->type && val->pointer->type->type);
559 struct vtn_ssa_value *ssa =
560 vtn_create_ssa_value(b, val->pointer->type->type);
561 ssa->def = vtn_pointer_to_ssa(b, val->pointer);
562 return ssa;
563
564 default:
565 vtn_fail("Invalid type for an SSA value");
566 }
567 }
568
569 struct vtn_value *
vtn_push_ssa_value(struct vtn_builder * b,uint32_t value_id,struct vtn_ssa_value * ssa)570 vtn_push_ssa_value(struct vtn_builder *b, uint32_t value_id,
571 struct vtn_ssa_value *ssa)
572 {
573 struct vtn_type *type = vtn_get_value_type(b, value_id);
574
575 /* See vtn_create_ssa_value */
576 vtn_fail_if(ssa->type != glsl_get_bare_type(type->type),
577 "Type mismatch for SPIR-V value %%%u", value_id);
578
579 struct vtn_value *val;
580 if (type->base_type == vtn_base_type_pointer) {
581 val = vtn_push_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type));
582 } else {
583 /* Don't trip the value_type_ssa check in vtn_push_value */
584 val = vtn_push_value(b, value_id, vtn_value_type_invalid);
585 val->value_type = vtn_value_type_ssa;
586 val->ssa = ssa;
587 }
588
589 return val;
590 }
591
592 nir_def *
vtn_get_nir_ssa(struct vtn_builder * b,uint32_t value_id)593 vtn_get_nir_ssa(struct vtn_builder *b, uint32_t value_id)
594 {
595 struct vtn_ssa_value *ssa = vtn_ssa_value(b, value_id);
596 vtn_fail_if(!glsl_type_is_vector_or_scalar(ssa->type),
597 "Expected a vector or scalar type");
598 return ssa->def;
599 }
600
601 struct vtn_value *
vtn_push_nir_ssa(struct vtn_builder * b,uint32_t value_id,nir_def * def)602 vtn_push_nir_ssa(struct vtn_builder *b, uint32_t value_id, nir_def *def)
603 {
604 /* Types for all SPIR-V SSA values are set as part of a pre-pass so the
605 * type will be valid by the time we get here.
606 */
607 struct vtn_type *type = vtn_get_value_type(b, value_id);
608 vtn_fail_if(def->num_components != glsl_get_vector_elements(type->type) ||
609 def->bit_size != glsl_get_bit_size(type->type),
610 "Mismatch between NIR and SPIR-V type.");
611 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
612 ssa->def = def;
613 return vtn_push_ssa_value(b, value_id, ssa);
614 }
615
616 nir_deref_instr *
vtn_get_deref_for_id(struct vtn_builder * b,uint32_t value_id)617 vtn_get_deref_for_id(struct vtn_builder *b, uint32_t value_id)
618 {
619 return vtn_get_deref_for_ssa_value(b, vtn_ssa_value(b, value_id));
620 }
621
622 nir_deref_instr *
vtn_get_deref_for_ssa_value(struct vtn_builder * b,struct vtn_ssa_value * ssa)623 vtn_get_deref_for_ssa_value(struct vtn_builder *b, struct vtn_ssa_value *ssa)
624 {
625 vtn_fail_if(!ssa->is_variable, "Expected an SSA value with a nir_variable");
626 return nir_build_deref_var(&b->nb, ssa->var);
627 }
628
629 struct vtn_value *
vtn_push_var_ssa(struct vtn_builder * b,uint32_t value_id,nir_variable * var)630 vtn_push_var_ssa(struct vtn_builder *b, uint32_t value_id, nir_variable *var)
631 {
632 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, var->type);
633 vtn_set_ssa_value_var(b, ssa, var);
634 return vtn_push_ssa_value(b, value_id, ssa);
635 }
636
637 static enum gl_access_qualifier
spirv_to_gl_access_qualifier(struct vtn_builder * b,SpvAccessQualifier access_qualifier)638 spirv_to_gl_access_qualifier(struct vtn_builder *b,
639 SpvAccessQualifier access_qualifier)
640 {
641 switch (access_qualifier) {
642 case SpvAccessQualifierReadOnly:
643 return ACCESS_NON_WRITEABLE;
644 case SpvAccessQualifierWriteOnly:
645 return ACCESS_NON_READABLE;
646 case SpvAccessQualifierReadWrite:
647 return 0;
648 default:
649 vtn_fail("Invalid image access qualifier");
650 }
651 }
652
653 static nir_deref_instr *
vtn_get_image(struct vtn_builder * b,uint32_t value_id,enum gl_access_qualifier * access)654 vtn_get_image(struct vtn_builder *b, uint32_t value_id,
655 enum gl_access_qualifier *access)
656 {
657 struct vtn_type *type = vtn_get_value_type(b, value_id);
658 vtn_assert(type->base_type == vtn_base_type_image);
659 if (access)
660 *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier);
661 nir_variable_mode mode = glsl_type_is_image(type->glsl_image) ?
662 nir_var_image : nir_var_uniform;
663 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
664 mode, type->glsl_image, 0);
665 }
666
667 static void
vtn_push_image(struct vtn_builder * b,uint32_t value_id,nir_deref_instr * deref,bool propagate_non_uniform)668 vtn_push_image(struct vtn_builder *b, uint32_t value_id,
669 nir_deref_instr *deref, bool propagate_non_uniform)
670 {
671 struct vtn_type *type = vtn_get_value_type(b, value_id);
672 vtn_assert(type->base_type == vtn_base_type_image);
673 struct vtn_value *value = vtn_push_nir_ssa(b, value_id, &deref->def);
674 value->propagated_non_uniform = propagate_non_uniform;
675 }
676
677 static nir_deref_instr *
vtn_get_sampler(struct vtn_builder * b,uint32_t value_id)678 vtn_get_sampler(struct vtn_builder *b, uint32_t value_id)
679 {
680 struct vtn_type *type = vtn_get_value_type(b, value_id);
681 vtn_assert(type->base_type == vtn_base_type_sampler);
682 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
683 nir_var_uniform, glsl_bare_sampler_type(), 0);
684 }
685
686 nir_def *
vtn_sampled_image_to_nir_ssa(struct vtn_builder * b,struct vtn_sampled_image si)687 vtn_sampled_image_to_nir_ssa(struct vtn_builder *b,
688 struct vtn_sampled_image si)
689 {
690 return nir_vec2(&b->nb, &si.image->def, &si.sampler->def);
691 }
692
693 static void
vtn_push_sampled_image(struct vtn_builder * b,uint32_t value_id,struct vtn_sampled_image si,bool propagate_non_uniform)694 vtn_push_sampled_image(struct vtn_builder *b, uint32_t value_id,
695 struct vtn_sampled_image si, bool propagate_non_uniform)
696 {
697 struct vtn_type *type = vtn_get_value_type(b, value_id);
698 vtn_assert(type->base_type == vtn_base_type_sampled_image);
699 struct vtn_value *value = vtn_push_nir_ssa(b, value_id,
700 vtn_sampled_image_to_nir_ssa(b, si));
701 value->propagated_non_uniform = propagate_non_uniform;
702 }
703
704 static struct vtn_sampled_image
vtn_get_sampled_image(struct vtn_builder * b,uint32_t value_id)705 vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id)
706 {
707 struct vtn_type *type = vtn_get_value_type(b, value_id);
708 vtn_assert(type->base_type == vtn_base_type_sampled_image);
709 nir_def *si_vec2 = vtn_get_nir_ssa(b, value_id);
710
711 /* Even though this is a sampled image, we can end up here with a storage
712 * image because OpenCL doesn't distinguish between the two.
713 */
714 const struct glsl_type *image_type = type->image->glsl_image;
715 nir_variable_mode image_mode = glsl_type_is_image(image_type) ?
716 nir_var_image : nir_var_uniform;
717
718 struct vtn_sampled_image si = { NULL, };
719 si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0),
720 image_mode, image_type, 0);
721 si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1),
722 nir_var_uniform,
723 glsl_bare_sampler_type(), 0);
724 return si;
725 }
726
727 const char *
vtn_string_literal(struct vtn_builder * b,const uint32_t * words,unsigned word_count,unsigned * words_used)728 vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
729 unsigned word_count, unsigned *words_used)
730 {
731 /* From the SPIR-V spec:
732 *
733 * "A string is interpreted as a nul-terminated stream of characters.
734 * The character set is Unicode in the UTF-8 encoding scheme. The UTF-8
735 * octets (8-bit bytes) are packed four per word, following the
736 * little-endian convention (i.e., the first octet is in the
737 * lowest-order 8 bits of the word). The final word contains the
738 * string’s nul-termination character (0), and all contents past the
739 * end of the string in the final word are padded with 0."
740 *
741 * On big-endian, we need to byte-swap.
742 */
743 #if UTIL_ARCH_BIG_ENDIAN
744 {
745 uint32_t *copy = vtn_alloc_array(b, uint32_t, word_count);
746 for (unsigned i = 0; i < word_count; i++)
747 copy[i] = util_bswap32(words[i]);
748 words = copy;
749 }
750 #endif
751
752 const char *str = (const char *)words;
753 const char *end = memchr(str, 0, word_count * 4);
754 vtn_fail_if(end == NULL, "String is not null-terminated");
755
756 if (words_used)
757 *words_used = DIV_ROUND_UP(end - str + 1, sizeof(*words));
758
759 return str;
760 }
761
762 const uint32_t *
vtn_foreach_instruction(struct vtn_builder * b,const uint32_t * start,const uint32_t * end,vtn_instruction_handler handler)763 vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
764 const uint32_t *end, vtn_instruction_handler handler)
765 {
766 const uint32_t *w = start;
767 while (w < end) {
768 SpvOp opcode = w[0] & SpvOpCodeMask;
769 unsigned count = w[0] >> SpvWordCountShift;
770 vtn_assert(count >= 1 && w + count <= end);
771
772 b->spirv_offset = (uint8_t *)w - (uint8_t *)b->spirv;
773
774 switch (opcode) {
775 case SpvOpNop:
776 break; /* Do nothing */
777
778 case SpvOpLine:
779 b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
780 b->line = w[2];
781 b->col = w[3];
782 break;
783
784 case SpvOpNoLine:
785 b->file = NULL;
786 b->line = -1;
787 b->col = -1;
788 break;
789
790 default:
791 if (!handler(b, opcode, w, count))
792 return w;
793 break;
794 }
795
796 w += count;
797 }
798
799 assert(w == end);
800 return w;
801 }
802
803 static bool
vtn_handle_non_semantic_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)804 vtn_handle_non_semantic_instruction(struct vtn_builder *b, SpvOp ext_opcode,
805 const uint32_t *w, unsigned count)
806 {
807 /* Do nothing. */
808 return true;
809 }
810
811 static void
vtn_handle_extension(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)812 vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
813 const uint32_t *w, unsigned count)
814 {
815 switch (opcode) {
816 case SpvOpExtInstImport: {
817 struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
818 const char *ext = vtn_string_literal(b, &w[2], count - 2, NULL);
819 if (strcmp(ext, "GLSL.std.450") == 0) {
820 val->ext_handler = vtn_handle_glsl450_instruction;
821 } else if ((strcmp(ext, "SPV_AMD_gcn_shader") == 0)
822 && (b->options && b->options->amd_gcn_shader)) {
823 val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
824 } else if ((strcmp(ext, "SPV_AMD_shader_ballot") == 0)
825 && (b->options && b->options->amd_shader_ballot)) {
826 val->ext_handler = vtn_handle_amd_shader_ballot_instruction;
827 } else if ((strcmp(ext, "SPV_AMD_shader_trinary_minmax") == 0)
828 && (b->options && b->options->amd_trinary_minmax)) {
829 val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction;
830 } else if ((strcmp(ext, "SPV_AMD_shader_explicit_vertex_parameter") == 0)
831 && (b->options && b->options->amd_shader_explicit_vertex_parameter)) {
832 val->ext_handler = vtn_handle_amd_shader_explicit_vertex_parameter_instruction;
833 } else if (strcmp(ext, "OpenCL.std") == 0) {
834 val->ext_handler = vtn_handle_opencl_instruction;
835 } else if (strstr(ext, "NonSemantic.") == ext) {
836 val->ext_handler = vtn_handle_non_semantic_instruction;
837 } else {
838 vtn_fail("Unsupported extension: %s", ext);
839 }
840 break;
841 }
842
843 case SpvOpExtInst:
844 case SpvOpExtInstWithForwardRefsKHR: {
845 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
846
847 if (opcode == SpvOpExtInstWithForwardRefsKHR)
848 assert(val->ext_handler == vtn_handle_non_semantic_instruction);
849
850 bool handled = val->ext_handler(b, w[4], w, count);
851 vtn_assert(handled);
852 break;
853 }
854
855 default:
856 vtn_fail_with_opcode("Unhandled opcode", opcode);
857 }
858 }
859
860 static void
_foreach_decoration_helper(struct vtn_builder * b,struct vtn_value * base_value,int parent_member,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)861 _foreach_decoration_helper(struct vtn_builder *b,
862 struct vtn_value *base_value,
863 int parent_member,
864 struct vtn_value *value,
865 vtn_decoration_foreach_cb cb, void *data)
866 {
867 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
868 int member;
869 if (dec->scope == VTN_DEC_DECORATION) {
870 member = parent_member;
871 } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
872 vtn_fail_if(value->value_type != vtn_value_type_type ||
873 value->type->base_type != vtn_base_type_struct,
874 "OpMemberDecorate and OpGroupMemberDecorate are only "
875 "allowed on OpTypeStruct");
876 /* This means we haven't recursed yet */
877 assert(value == base_value);
878
879 member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
880
881 vtn_fail_if(member >= base_value->type->length,
882 "OpMemberDecorate specifies member %d but the "
883 "OpTypeStruct has only %u members",
884 member, base_value->type->length);
885 } else {
886 /* Not a decoration */
887 assert(dec->scope == VTN_DEC_EXECUTION_MODE ||
888 dec->scope <= VTN_DEC_STRUCT_MEMBER_NAME0);
889 continue;
890 }
891
892 if (dec->group) {
893 assert(dec->group->value_type == vtn_value_type_decoration_group);
894 _foreach_decoration_helper(b, base_value, member, dec->group,
895 cb, data);
896 } else {
897 cb(b, base_value, member, dec, data);
898 }
899 }
900 }
901
902 /** Iterates (recursively if needed) over all of the decorations on a value
903 *
904 * This function iterates over all of the decorations applied to a given
905 * value. If it encounters a decoration group, it recurses into the group
906 * and iterates over all of those decorations as well.
907 */
908 void
vtn_foreach_decoration(struct vtn_builder * b,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)909 vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
910 vtn_decoration_foreach_cb cb, void *data)
911 {
912 _foreach_decoration_helper(b, value, -1, value, cb, data);
913 }
914
915 void
vtn_foreach_execution_mode(struct vtn_builder * b,struct vtn_value * value,vtn_execution_mode_foreach_cb cb,void * data)916 vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
917 vtn_execution_mode_foreach_cb cb, void *data)
918 {
919 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
920 if (dec->scope != VTN_DEC_EXECUTION_MODE)
921 continue;
922
923 assert(dec->group == NULL);
924 cb(b, value, dec, data);
925 }
926 }
927
928 void
vtn_handle_decoration(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)929 vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
930 const uint32_t *w, unsigned count)
931 {
932 const uint32_t *w_end = w + count;
933 const uint32_t target = w[1];
934 w += 2;
935
936 switch (opcode) {
937 case SpvOpDecorationGroup:
938 vtn_push_value(b, target, vtn_value_type_decoration_group);
939 break;
940
941 case SpvOpDecorate:
942 case SpvOpDecorateId:
943 case SpvOpMemberDecorate:
944 case SpvOpDecorateString:
945 case SpvOpMemberDecorateString:
946 case SpvOpExecutionMode:
947 case SpvOpExecutionModeId: {
948 struct vtn_value *val = vtn_untyped_value(b, target);
949
950 struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
951 switch (opcode) {
952 case SpvOpDecorate:
953 case SpvOpDecorateId:
954 case SpvOpDecorateString:
955 dec->scope = VTN_DEC_DECORATION;
956 break;
957 case SpvOpMemberDecorate:
958 case SpvOpMemberDecorateString:
959 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
960 vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */
961 "Member argument of OpMemberDecorate too large");
962 break;
963 case SpvOpExecutionMode:
964 case SpvOpExecutionModeId:
965 dec->scope = VTN_DEC_EXECUTION_MODE;
966 break;
967 default:
968 unreachable("Invalid decoration opcode");
969 }
970 dec->decoration = *(w++);
971 dec->num_operands = w_end - w;
972 dec->operands = w;
973
974 /* Link into the list */
975 dec->next = val->decoration;
976 val->decoration = dec;
977 break;
978 }
979
980 case SpvOpMemberName: {
981 struct vtn_value *val = vtn_untyped_value(b, target);
982 struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
983
984 dec->scope = VTN_DEC_STRUCT_MEMBER_NAME0 - *(w++);
985
986 dec->member_name = vtn_string_literal(b, w, w_end - w, NULL);
987
988 dec->next = val->decoration;
989 val->decoration = dec;
990 break;
991 }
992
993 case SpvOpGroupMemberDecorate:
994 case SpvOpGroupDecorate: {
995 struct vtn_value *group =
996 vtn_value(b, target, vtn_value_type_decoration_group);
997
998 for (; w < w_end; w++) {
999 struct vtn_value *val = vtn_untyped_value(b, *w);
1000 struct vtn_decoration *dec = vtn_zalloc(b, struct vtn_decoration);
1001
1002 dec->group = group;
1003 if (opcode == SpvOpGroupDecorate) {
1004 dec->scope = VTN_DEC_DECORATION;
1005 } else {
1006 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
1007 vtn_fail_if(dec->scope < 0, /* Check for overflow */
1008 "Member argument of OpGroupMemberDecorate too large");
1009 }
1010
1011 /* Link into the list */
1012 dec->next = val->decoration;
1013 val->decoration = dec;
1014 }
1015 break;
1016 }
1017
1018 default:
1019 unreachable("Unhandled opcode");
1020 }
1021 }
1022
1023 struct member_decoration_ctx {
1024 unsigned num_fields;
1025 struct glsl_struct_field *fields;
1026 struct vtn_type *type;
1027 };
1028
1029 /**
1030 * Returns true if the given type contains a struct decorated Block or
1031 * BufferBlock
1032 */
1033 bool
vtn_type_contains_block(struct vtn_builder * b,struct vtn_type * type)1034 vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type)
1035 {
1036 switch (type->base_type) {
1037 case vtn_base_type_array:
1038 return vtn_type_contains_block(b, type->array_element);
1039 case vtn_base_type_struct:
1040 if (type->block || type->buffer_block)
1041 return true;
1042 for (unsigned i = 0; i < type->length; i++) {
1043 if (vtn_type_contains_block(b, type->members[i]))
1044 return true;
1045 }
1046 return false;
1047 default:
1048 return false;
1049 }
1050 }
1051
1052 /** Returns true if two types are "compatible", i.e. you can do an OpLoad,
1053 * OpStore, or OpCopyMemory between them without breaking anything.
1054 * Technically, the SPIR-V rules require the exact same type ID but this lets
1055 * us internally be a bit looser.
1056 */
1057 bool
vtn_types_compatible(struct vtn_builder * b,struct vtn_type * t1,struct vtn_type * t2)1058 vtn_types_compatible(struct vtn_builder *b,
1059 struct vtn_type *t1, struct vtn_type *t2)
1060 {
1061 if (t1->id == t2->id)
1062 return true;
1063
1064 if (t1->base_type != t2->base_type)
1065 return false;
1066
1067 switch (t1->base_type) {
1068 case vtn_base_type_void:
1069 case vtn_base_type_scalar:
1070 case vtn_base_type_vector:
1071 case vtn_base_type_matrix:
1072 case vtn_base_type_image:
1073 case vtn_base_type_sampler:
1074 case vtn_base_type_sampled_image:
1075 case vtn_base_type_event:
1076 case vtn_base_type_cooperative_matrix:
1077 return t1->type == t2->type;
1078
1079 case vtn_base_type_array:
1080 return t1->length == t2->length &&
1081 vtn_types_compatible(b, t1->array_element, t2->array_element);
1082
1083 case vtn_base_type_pointer:
1084 return vtn_types_compatible(b, t1->pointed, t2->pointed);
1085
1086 case vtn_base_type_struct:
1087 if (t1->length != t2->length)
1088 return false;
1089
1090 for (unsigned i = 0; i < t1->length; i++) {
1091 if (!vtn_types_compatible(b, t1->members[i], t2->members[i]))
1092 return false;
1093 }
1094 return true;
1095
1096 case vtn_base_type_accel_struct:
1097 case vtn_base_type_ray_query:
1098 return true;
1099
1100 case vtn_base_type_function:
1101 /* This case shouldn't get hit since you can't copy around function
1102 * types. Just require them to be identical.
1103 */
1104 return false;
1105 }
1106
1107 vtn_fail("Invalid base type");
1108 }
1109
1110 struct vtn_type *
vtn_type_without_array(struct vtn_type * type)1111 vtn_type_without_array(struct vtn_type *type)
1112 {
1113 while (type->base_type == vtn_base_type_array)
1114 type = type->array_element;
1115 return type;
1116 }
1117
1118 /* does a shallow copy of a vtn_type */
1119
1120 static struct vtn_type *
vtn_type_copy(struct vtn_builder * b,struct vtn_type * src)1121 vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
1122 {
1123 struct vtn_type *dest = vtn_alloc(b, struct vtn_type);
1124 *dest = *src;
1125
1126 switch (src->base_type) {
1127 case vtn_base_type_void:
1128 case vtn_base_type_scalar:
1129 case vtn_base_type_vector:
1130 case vtn_base_type_matrix:
1131 case vtn_base_type_array:
1132 case vtn_base_type_pointer:
1133 case vtn_base_type_image:
1134 case vtn_base_type_sampler:
1135 case vtn_base_type_sampled_image:
1136 case vtn_base_type_event:
1137 case vtn_base_type_accel_struct:
1138 case vtn_base_type_ray_query:
1139 case vtn_base_type_cooperative_matrix:
1140 /* Nothing more to do */
1141 break;
1142
1143 case vtn_base_type_struct:
1144 dest->members = vtn_alloc_array(b, struct vtn_type *, src->length);
1145 memcpy(dest->members, src->members,
1146 src->length * sizeof(src->members[0]));
1147
1148 dest->offsets = vtn_alloc_array(b, unsigned, src->length);
1149 memcpy(dest->offsets, src->offsets,
1150 src->length * sizeof(src->offsets[0]));
1151 break;
1152
1153 case vtn_base_type_function:
1154 dest->params = vtn_alloc_array(b, struct vtn_type *, src->length);
1155 memcpy(dest->params, src->params, src->length * sizeof(src->params[0]));
1156 break;
1157 }
1158
1159 return dest;
1160 }
1161
1162 static bool
vtn_type_needs_explicit_layout(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)1163 vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type,
1164 enum vtn_variable_mode mode)
1165 {
1166 /* For OpenCL we never want to strip the info from the types, and it makes
1167 * type comparisons easier in later stages.
1168 */
1169 if (b->options->environment == NIR_SPIRV_OPENCL)
1170 return true;
1171
1172 switch (mode) {
1173 case vtn_variable_mode_input:
1174 case vtn_variable_mode_output:
1175 /* Layout decorations kept because we need offsets for XFB arrays of
1176 * blocks.
1177 */
1178 return b->shader->info.has_transform_feedback_varyings;
1179
1180 case vtn_variable_mode_ssbo:
1181 case vtn_variable_mode_phys_ssbo:
1182 case vtn_variable_mode_ubo:
1183 case vtn_variable_mode_push_constant:
1184 case vtn_variable_mode_shader_record:
1185 return true;
1186
1187 case vtn_variable_mode_workgroup:
1188 return b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR;
1189
1190 default:
1191 return false;
1192 }
1193 }
1194
1195 const struct glsl_type *
vtn_type_get_nir_type(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)1196 vtn_type_get_nir_type(struct vtn_builder *b, struct vtn_type *type,
1197 enum vtn_variable_mode mode)
1198 {
1199 if (mode == vtn_variable_mode_atomic_counter) {
1200 vtn_fail_if(glsl_without_array(type->type) != glsl_uint_type(),
1201 "Variables in the AtomicCounter storage class should be "
1202 "(possibly arrays of arrays of) uint.");
1203 return glsl_type_wrap_in_arrays(glsl_atomic_uint_type(), type->type);
1204 }
1205
1206 if (mode == vtn_variable_mode_uniform) {
1207 switch (type->base_type) {
1208 case vtn_base_type_array: {
1209 const struct glsl_type *elem_type =
1210 vtn_type_get_nir_type(b, type->array_element, mode);
1211
1212 return glsl_array_type(elem_type, type->length,
1213 glsl_get_explicit_stride(type->type));
1214 }
1215
1216 case vtn_base_type_struct: {
1217 bool need_new_struct = false;
1218 const uint32_t num_fields = type->length;
1219 NIR_VLA(struct glsl_struct_field, fields, num_fields);
1220 for (unsigned i = 0; i < num_fields; i++) {
1221 fields[i] = *glsl_get_struct_field_data(type->type, i);
1222 const struct glsl_type *field_nir_type =
1223 vtn_type_get_nir_type(b, type->members[i], mode);
1224 if (fields[i].type != field_nir_type) {
1225 fields[i].type = field_nir_type;
1226 need_new_struct = true;
1227 }
1228 }
1229 if (need_new_struct) {
1230 if (glsl_type_is_interface(type->type)) {
1231 return glsl_interface_type(fields, num_fields,
1232 /* packing */ 0, false,
1233 glsl_get_type_name(type->type));
1234 } else {
1235 return glsl_struct_type(fields, num_fields,
1236 glsl_get_type_name(type->type),
1237 glsl_struct_type_is_packed(type->type));
1238 }
1239 } else {
1240 /* No changes, just pass it on */
1241 return type->type;
1242 }
1243 }
1244
1245 case vtn_base_type_image:
1246 vtn_assert(glsl_type_is_texture(type->glsl_image));
1247 return type->glsl_image;
1248
1249 case vtn_base_type_sampler:
1250 return glsl_bare_sampler_type();
1251
1252 case vtn_base_type_sampled_image:
1253 return glsl_texture_type_to_sampler(type->image->glsl_image,
1254 false /* is_shadow */);
1255
1256 default:
1257 return type->type;
1258 }
1259 }
1260
1261 if (mode == vtn_variable_mode_image) {
1262 struct vtn_type *image_type = vtn_type_without_array(type);
1263 vtn_assert(image_type->base_type == vtn_base_type_image);
1264 return glsl_type_wrap_in_arrays(image_type->glsl_image, type->type);
1265 }
1266
1267 /* Layout decorations are allowed but ignored in certain conditions,
1268 * to allow SPIR-V generators perform type deduplication. Discard
1269 * unnecessary ones when passing to NIR.
1270 */
1271 if (!vtn_type_needs_explicit_layout(b, type, mode))
1272 return glsl_get_bare_type(type->type);
1273
1274 return type->type;
1275 }
1276
1277 static struct vtn_type *
mutable_matrix_member(struct vtn_builder * b,struct vtn_type * type,int member)1278 mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
1279 {
1280 type->members[member] = vtn_type_copy(b, type->members[member]);
1281 type = type->members[member];
1282
1283 /* We may have an array of matrices.... Oh, joy! */
1284 while (glsl_type_is_array(type->type)) {
1285 type->array_element = vtn_type_copy(b, type->array_element);
1286 type = type->array_element;
1287 }
1288
1289 vtn_assert(glsl_type_is_matrix(type->type));
1290
1291 return type;
1292 }
1293
1294 static void
vtn_handle_access_qualifier(struct vtn_builder * b,struct vtn_type * type,int member,enum gl_access_qualifier access)1295 vtn_handle_access_qualifier(struct vtn_builder *b, struct vtn_type *type,
1296 int member, enum gl_access_qualifier access)
1297 {
1298 type->members[member] = vtn_type_copy(b, type->members[member]);
1299 type = type->members[member];
1300
1301 type->access |= access;
1302 }
1303
1304 static void
array_stride_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1305 array_stride_decoration_cb(struct vtn_builder *b,
1306 struct vtn_value *val, int member,
1307 const struct vtn_decoration *dec, void *void_ctx)
1308 {
1309 struct vtn_type *type = val->type;
1310
1311 if (dec->decoration == SpvDecorationArrayStride) {
1312 if (vtn_type_contains_block(b, type)) {
1313 vtn_warn("The ArrayStride decoration cannot be applied to an array "
1314 "type which contains a structure type decorated Block "
1315 "or BufferBlock");
1316 /* Ignore the decoration */
1317 } else {
1318 vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero");
1319 type->stride = dec->operands[0];
1320 }
1321 }
1322 }
1323
1324 static void
struct_member_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1325 struct_member_decoration_cb(struct vtn_builder *b,
1326 UNUSED struct vtn_value *val, int member,
1327 const struct vtn_decoration *dec, void *void_ctx)
1328 {
1329 struct member_decoration_ctx *ctx = void_ctx;
1330
1331 if (member < 0)
1332 return;
1333
1334 assert(member < ctx->num_fields);
1335
1336 switch (dec->decoration) {
1337 case SpvDecorationRelaxedPrecision:
1338 case SpvDecorationUniform:
1339 case SpvDecorationUniformId:
1340 break; /* FIXME: Do nothing with this for now. */
1341 case SpvDecorationNonWritable:
1342 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_WRITEABLE);
1343 break;
1344 case SpvDecorationNonReadable:
1345 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_READABLE);
1346 break;
1347 case SpvDecorationVolatile:
1348 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_VOLATILE);
1349 break;
1350 case SpvDecorationCoherent:
1351 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_COHERENT);
1352 break;
1353 case SpvDecorationNoPerspective:
1354 ctx->fields[member].interpolation = INTERP_MODE_NOPERSPECTIVE;
1355 break;
1356 case SpvDecorationFlat:
1357 ctx->fields[member].interpolation = INTERP_MODE_FLAT;
1358 break;
1359 case SpvDecorationExplicitInterpAMD:
1360 ctx->fields[member].interpolation = INTERP_MODE_EXPLICIT;
1361 break;
1362 case SpvDecorationCentroid:
1363 ctx->fields[member].centroid = true;
1364 break;
1365 case SpvDecorationSample:
1366 ctx->fields[member].sample = true;
1367 break;
1368 case SpvDecorationStream:
1369 /* This is handled later by var_decoration_cb in vtn_variables.c */
1370 break;
1371 case SpvDecorationLocation:
1372 ctx->fields[member].location = dec->operands[0];
1373 break;
1374 case SpvDecorationComponent:
1375 break; /* FIXME: What should we do with these? */
1376 case SpvDecorationBuiltIn:
1377 ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
1378 ctx->type->members[member]->is_builtin = true;
1379 ctx->type->members[member]->builtin = dec->operands[0];
1380 ctx->type->builtin_block = true;
1381 break;
1382 case SpvDecorationOffset:
1383 ctx->type->offsets[member] = dec->operands[0];
1384 ctx->fields[member].offset = dec->operands[0];
1385 break;
1386 case SpvDecorationMatrixStride:
1387 /* Handled as a second pass */
1388 break;
1389 case SpvDecorationColMajor:
1390 break; /* Nothing to do here. Column-major is the default. */
1391 case SpvDecorationRowMajor:
1392 mutable_matrix_member(b, ctx->type, member)->row_major = true;
1393 break;
1394
1395 case SpvDecorationPatch:
1396 case SpvDecorationPerPrimitiveNV:
1397 case SpvDecorationPerTaskNV:
1398 case SpvDecorationPerViewNV:
1399 break;
1400
1401 case SpvDecorationSpecId:
1402 case SpvDecorationBlock:
1403 case SpvDecorationBufferBlock:
1404 case SpvDecorationArrayStride:
1405 case SpvDecorationGLSLShared:
1406 case SpvDecorationGLSLPacked:
1407 case SpvDecorationAliased:
1408 case SpvDecorationConstant:
1409 case SpvDecorationIndex:
1410 case SpvDecorationBinding:
1411 case SpvDecorationDescriptorSet:
1412 case SpvDecorationLinkageAttributes:
1413 case SpvDecorationNoContraction:
1414 case SpvDecorationInputAttachmentIndex:
1415 case SpvDecorationCPacked:
1416 vtn_warn("Decoration not allowed on struct members: %s",
1417 spirv_decoration_to_string(dec->decoration));
1418 break;
1419
1420 case SpvDecorationRestrict:
1421 /* While "Restrict" is invalid for struct members, glslang incorrectly
1422 * generates it and it ends up hiding actual driver issues in a wall of
1423 * spam from deqp-vk. Return it to the above block once the issue is
1424 * resolved. https://github.com/KhronosGroup/glslang/issues/703
1425 */
1426 break;
1427
1428 case SpvDecorationInvariant:
1429 /* Also incorrectly generated by glslang, ignore it. */
1430 break;
1431
1432 case SpvDecorationXfbBuffer:
1433 case SpvDecorationXfbStride:
1434 /* This is handled later by var_decoration_cb in vtn_variables.c */
1435 break;
1436
1437 case SpvDecorationSaturatedConversion:
1438 case SpvDecorationFuncParamAttr:
1439 case SpvDecorationFPRoundingMode:
1440 case SpvDecorationAlignment:
1441 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1442 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1443 spirv_decoration_to_string(dec->decoration));
1444 }
1445 break;
1446
1447 case SpvDecorationFPFastMathMode:
1448 /* See handle_fp_fast_math(). */
1449 break;
1450
1451 case SpvDecorationUserSemantic:
1452 case SpvDecorationUserTypeGOOGLE:
1453 /* User semantic decorations can safely be ignored by the driver. */
1454 break;
1455
1456 default:
1457 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1458 }
1459 }
1460
1461 /** Chases the array type all the way down to the tail and rewrites the
1462 * glsl_types to be based off the tail's glsl_type.
1463 */
1464 static void
vtn_array_type_rewrite_glsl_type(struct vtn_type * type)1465 vtn_array_type_rewrite_glsl_type(struct vtn_type *type)
1466 {
1467 if (type->base_type != vtn_base_type_array)
1468 return;
1469
1470 vtn_array_type_rewrite_glsl_type(type->array_element);
1471
1472 type->type = glsl_array_type(type->array_element->type,
1473 type->length, type->stride);
1474 }
1475
1476 /* Matrix strides are handled as a separate pass because we need to know
1477 * whether the matrix is row-major or not first.
1478 */
1479 static void
struct_member_matrix_stride_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1480 struct_member_matrix_stride_cb(struct vtn_builder *b,
1481 UNUSED struct vtn_value *val, int member,
1482 const struct vtn_decoration *dec,
1483 void *void_ctx)
1484 {
1485 if (dec->decoration != SpvDecorationMatrixStride)
1486 return;
1487
1488 vtn_fail_if(member < 0,
1489 "The MatrixStride decoration is only allowed on members "
1490 "of OpTypeStruct");
1491 vtn_fail_if(dec->operands[0] == 0, "MatrixStride must be non-zero");
1492
1493 struct member_decoration_ctx *ctx = void_ctx;
1494
1495 struct vtn_type *mat_type = mutable_matrix_member(b, ctx->type, member);
1496 if (mat_type->row_major) {
1497 mat_type->array_element = vtn_type_copy(b, mat_type->array_element);
1498 mat_type->stride = mat_type->array_element->stride;
1499 mat_type->array_element->stride = dec->operands[0];
1500
1501 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1502 dec->operands[0], true);
1503 mat_type->array_element->type = glsl_get_column_type(mat_type->type);
1504 } else {
1505 vtn_assert(mat_type->array_element->stride > 0);
1506 mat_type->stride = dec->operands[0];
1507
1508 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1509 dec->operands[0], false);
1510 }
1511
1512 /* Now that we've replaced the glsl_type with a properly strided matrix
1513 * type, rewrite the member type so that it's an array of the proper kind
1514 * of glsl_type.
1515 */
1516 vtn_array_type_rewrite_glsl_type(ctx->type->members[member]);
1517 ctx->fields[member].type = ctx->type->members[member]->type;
1518 }
1519
1520 static void
struct_packed_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1521 struct_packed_decoration_cb(struct vtn_builder *b,
1522 struct vtn_value *val, int member,
1523 const struct vtn_decoration *dec, void *void_ctx)
1524 {
1525 vtn_assert(val->type->base_type == vtn_base_type_struct);
1526 if (dec->decoration == SpvDecorationCPacked) {
1527 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1528 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1529 spirv_decoration_to_string(dec->decoration));
1530 }
1531 val->type->packed = true;
1532 }
1533 }
1534
1535 static void
struct_block_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * ctx)1536 struct_block_decoration_cb(struct vtn_builder *b,
1537 struct vtn_value *val, int member,
1538 const struct vtn_decoration *dec, void *ctx)
1539 {
1540 if (member != -1)
1541 return;
1542
1543 struct vtn_type *type = val->type;
1544 if (dec->decoration == SpvDecorationBlock)
1545 type->block = true;
1546 else if (dec->decoration == SpvDecorationBufferBlock)
1547 type->buffer_block = true;
1548 }
1549
1550 static void
type_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,UNUSED void * ctx)1551 type_decoration_cb(struct vtn_builder *b,
1552 struct vtn_value *val, int member,
1553 const struct vtn_decoration *dec, UNUSED void *ctx)
1554 {
1555 struct vtn_type *type = val->type;
1556
1557 if (member != -1) {
1558 /* This should have been handled by OpTypeStruct */
1559 assert(val->type->base_type == vtn_base_type_struct);
1560 assert(member >= 0 && member < val->type->length);
1561 return;
1562 }
1563
1564 switch (dec->decoration) {
1565 case SpvDecorationArrayStride:
1566 vtn_assert(type->base_type == vtn_base_type_array ||
1567 type->base_type == vtn_base_type_pointer);
1568 break;
1569 case SpvDecorationBlock:
1570 vtn_assert(type->base_type == vtn_base_type_struct);
1571 vtn_assert(type->block);
1572 break;
1573 case SpvDecorationBufferBlock:
1574 vtn_assert(type->base_type == vtn_base_type_struct);
1575 vtn_assert(type->buffer_block);
1576 break;
1577 case SpvDecorationGLSLShared:
1578 case SpvDecorationGLSLPacked:
1579 /* Ignore these, since we get explicit offsets anyways */
1580 break;
1581
1582 case SpvDecorationRowMajor:
1583 case SpvDecorationColMajor:
1584 case SpvDecorationMatrixStride:
1585 case SpvDecorationBuiltIn:
1586 case SpvDecorationNoPerspective:
1587 case SpvDecorationFlat:
1588 case SpvDecorationPatch:
1589 case SpvDecorationCentroid:
1590 case SpvDecorationSample:
1591 case SpvDecorationExplicitInterpAMD:
1592 case SpvDecorationVolatile:
1593 case SpvDecorationCoherent:
1594 case SpvDecorationNonWritable:
1595 case SpvDecorationNonReadable:
1596 case SpvDecorationUniform:
1597 case SpvDecorationUniformId:
1598 case SpvDecorationLocation:
1599 case SpvDecorationComponent:
1600 case SpvDecorationOffset:
1601 case SpvDecorationXfbBuffer:
1602 case SpvDecorationXfbStride:
1603 case SpvDecorationUserSemantic:
1604 vtn_warn("Decoration only allowed for struct members: %s",
1605 spirv_decoration_to_string(dec->decoration));
1606 break;
1607
1608 case SpvDecorationStream:
1609 /* We don't need to do anything here, as stream is filled up when
1610 * aplying the decoration to a variable, just check that if it is not a
1611 * struct member, it should be a struct.
1612 */
1613 vtn_assert(type->base_type == vtn_base_type_struct);
1614 break;
1615
1616 case SpvDecorationRelaxedPrecision:
1617 case SpvDecorationSpecId:
1618 case SpvDecorationInvariant:
1619 case SpvDecorationRestrict:
1620 case SpvDecorationAliased:
1621 case SpvDecorationConstant:
1622 case SpvDecorationIndex:
1623 case SpvDecorationBinding:
1624 case SpvDecorationDescriptorSet:
1625 case SpvDecorationLinkageAttributes:
1626 case SpvDecorationNoContraction:
1627 case SpvDecorationInputAttachmentIndex:
1628 vtn_warn("Decoration not allowed on types: %s",
1629 spirv_decoration_to_string(dec->decoration));
1630 break;
1631
1632 case SpvDecorationCPacked:
1633 /* Handled when parsing a struct type, nothing to do here. */
1634 break;
1635
1636 case SpvDecorationSaturatedConversion:
1637 case SpvDecorationFuncParamAttr:
1638 case SpvDecorationFPRoundingMode:
1639 case SpvDecorationAlignment:
1640 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1641 spirv_decoration_to_string(dec->decoration));
1642 break;
1643
1644 case SpvDecorationFPFastMathMode:
1645 /* See handle_fp_fast_math(). */
1646 break;
1647
1648 case SpvDecorationUserTypeGOOGLE:
1649 /* User semantic decorations can safely be ignored by the driver. */
1650 break;
1651
1652 default:
1653 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1654 }
1655 }
1656
1657 static unsigned
translate_image_format(struct vtn_builder * b,SpvImageFormat format)1658 translate_image_format(struct vtn_builder *b, SpvImageFormat format)
1659 {
1660 switch (format) {
1661 case SpvImageFormatUnknown: return PIPE_FORMAT_NONE;
1662 case SpvImageFormatRgba32f: return PIPE_FORMAT_R32G32B32A32_FLOAT;
1663 case SpvImageFormatRgba16f: return PIPE_FORMAT_R16G16B16A16_FLOAT;
1664 case SpvImageFormatR32f: return PIPE_FORMAT_R32_FLOAT;
1665 case SpvImageFormatRgba8: return PIPE_FORMAT_R8G8B8A8_UNORM;
1666 case SpvImageFormatRgba8Snorm: return PIPE_FORMAT_R8G8B8A8_SNORM;
1667 case SpvImageFormatRg32f: return PIPE_FORMAT_R32G32_FLOAT;
1668 case SpvImageFormatRg16f: return PIPE_FORMAT_R16G16_FLOAT;
1669 case SpvImageFormatR11fG11fB10f: return PIPE_FORMAT_R11G11B10_FLOAT;
1670 case SpvImageFormatR16f: return PIPE_FORMAT_R16_FLOAT;
1671 case SpvImageFormatRgba16: return PIPE_FORMAT_R16G16B16A16_UNORM;
1672 case SpvImageFormatRgb10A2: return PIPE_FORMAT_R10G10B10A2_UNORM;
1673 case SpvImageFormatRg16: return PIPE_FORMAT_R16G16_UNORM;
1674 case SpvImageFormatRg8: return PIPE_FORMAT_R8G8_UNORM;
1675 case SpvImageFormatR16: return PIPE_FORMAT_R16_UNORM;
1676 case SpvImageFormatR8: return PIPE_FORMAT_R8_UNORM;
1677 case SpvImageFormatRgba16Snorm: return PIPE_FORMAT_R16G16B16A16_SNORM;
1678 case SpvImageFormatRg16Snorm: return PIPE_FORMAT_R16G16_SNORM;
1679 case SpvImageFormatRg8Snorm: return PIPE_FORMAT_R8G8_SNORM;
1680 case SpvImageFormatR16Snorm: return PIPE_FORMAT_R16_SNORM;
1681 case SpvImageFormatR8Snorm: return PIPE_FORMAT_R8_SNORM;
1682 case SpvImageFormatRgba32i: return PIPE_FORMAT_R32G32B32A32_SINT;
1683 case SpvImageFormatRgba16i: return PIPE_FORMAT_R16G16B16A16_SINT;
1684 case SpvImageFormatRgba8i: return PIPE_FORMAT_R8G8B8A8_SINT;
1685 case SpvImageFormatR32i: return PIPE_FORMAT_R32_SINT;
1686 case SpvImageFormatRg32i: return PIPE_FORMAT_R32G32_SINT;
1687 case SpvImageFormatRg16i: return PIPE_FORMAT_R16G16_SINT;
1688 case SpvImageFormatRg8i: return PIPE_FORMAT_R8G8_SINT;
1689 case SpvImageFormatR16i: return PIPE_FORMAT_R16_SINT;
1690 case SpvImageFormatR8i: return PIPE_FORMAT_R8_SINT;
1691 case SpvImageFormatRgba32ui: return PIPE_FORMAT_R32G32B32A32_UINT;
1692 case SpvImageFormatRgba16ui: return PIPE_FORMAT_R16G16B16A16_UINT;
1693 case SpvImageFormatRgba8ui: return PIPE_FORMAT_R8G8B8A8_UINT;
1694 case SpvImageFormatR32ui: return PIPE_FORMAT_R32_UINT;
1695 case SpvImageFormatRgb10a2ui: return PIPE_FORMAT_R10G10B10A2_UINT;
1696 case SpvImageFormatRg32ui: return PIPE_FORMAT_R32G32_UINT;
1697 case SpvImageFormatRg16ui: return PIPE_FORMAT_R16G16_UINT;
1698 case SpvImageFormatRg8ui: return PIPE_FORMAT_R8G8_UINT;
1699 case SpvImageFormatR16ui: return PIPE_FORMAT_R16_UINT;
1700 case SpvImageFormatR8ui: return PIPE_FORMAT_R8_UINT;
1701 case SpvImageFormatR64ui: return PIPE_FORMAT_R64_UINT;
1702 case SpvImageFormatR64i: return PIPE_FORMAT_R64_SINT;
1703 default:
1704 vtn_fail("Invalid image format: %s (%u)",
1705 spirv_imageformat_to_string(format), format);
1706 }
1707 }
1708
1709 static void
validate_image_type_for_sampled_image(struct vtn_builder * b,const struct glsl_type * image_type,const char * operand)1710 validate_image_type_for_sampled_image(struct vtn_builder *b,
1711 const struct glsl_type *image_type,
1712 const char *operand)
1713 {
1714 /* From OpTypeSampledImage description in SPIR-V 1.6, revision 1:
1715 *
1716 * Image Type must be an OpTypeImage. It is the type of the image in the
1717 * combined sampler and image type. It must not have a Dim of
1718 * SubpassData. Additionally, starting with version 1.6, it must not have
1719 * a Dim of Buffer.
1720 *
1721 * Same also applies to the type of the Image operand in OpSampledImage.
1722 */
1723
1724 const enum glsl_sampler_dim dim = glsl_get_sampler_dim(image_type);
1725
1726 vtn_fail_if(dim == GLSL_SAMPLER_DIM_SUBPASS ||
1727 dim == GLSL_SAMPLER_DIM_SUBPASS_MS,
1728 "%s must not have a Dim of SubpassData.", operand);
1729
1730 if (dim == GLSL_SAMPLER_DIM_BUF) {
1731 if (b->version >= 0x10600) {
1732 vtn_fail("Starting with SPIR-V 1.6, %s "
1733 "must not have a Dim of Buffer.", operand);
1734 } else {
1735 vtn_warn("%s should not have a Dim of Buffer.", operand);
1736 }
1737 }
1738 }
1739
1740 static void
vtn_handle_type(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1741 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
1742 const uint32_t *w, unsigned count)
1743 {
1744 struct vtn_value *val = NULL;
1745
1746 /* In order to properly handle forward declarations, we have to defer
1747 * allocation for pointer types.
1748 */
1749 if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) {
1750 val = vtn_push_value(b, w[1], vtn_value_type_type);
1751 vtn_fail_if(val->type != NULL,
1752 "Only pointers can have forward declarations");
1753 val->type = vtn_zalloc(b, struct vtn_type);
1754 val->type->id = w[1];
1755 }
1756
1757 switch (opcode) {
1758 case SpvOpTypeVoid:
1759 val->type->base_type = vtn_base_type_void;
1760 val->type->type = glsl_void_type();
1761 break;
1762 case SpvOpTypeBool:
1763 val->type->base_type = vtn_base_type_scalar;
1764 val->type->type = glsl_bool_type();
1765 val->type->length = 1;
1766 break;
1767 case SpvOpTypeInt: {
1768 int bit_size = w[2];
1769 const bool signedness = w[3];
1770 vtn_fail_if(bit_size != 8 && bit_size != 16 &&
1771 bit_size != 32 && bit_size != 64,
1772 "Invalid int bit size: %u", bit_size);
1773 val->type->base_type = vtn_base_type_scalar;
1774 val->type->type = signedness ? glsl_intN_t_type(bit_size) :
1775 glsl_uintN_t_type(bit_size);
1776 val->type->length = 1;
1777 break;
1778 }
1779
1780 case SpvOpTypeFloat: {
1781 int bit_size = w[2];
1782 val->type->base_type = vtn_base_type_scalar;
1783 vtn_fail_if(bit_size != 16 && bit_size != 32 && bit_size != 64,
1784 "Invalid float bit size: %u", bit_size);
1785 val->type->type = glsl_floatN_t_type(bit_size);
1786 val->type->length = 1;
1787 break;
1788 }
1789
1790 case SpvOpTypeVector: {
1791 struct vtn_type *base = vtn_get_type(b, w[2]);
1792 unsigned elems = w[3];
1793
1794 vtn_fail_if(base->base_type != vtn_base_type_scalar,
1795 "Base type for OpTypeVector must be a scalar");
1796 vtn_fail_if((elems < 2 || elems > 4) && (elems != 8) && (elems != 16),
1797 "Invalid component count for OpTypeVector");
1798
1799 val->type->base_type = vtn_base_type_vector;
1800 val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
1801 val->type->length = elems;
1802 val->type->stride = glsl_type_is_boolean(val->type->type)
1803 ? 4 : glsl_get_bit_size(base->type) / 8;
1804 val->type->array_element = base;
1805 break;
1806 }
1807
1808 case SpvOpTypeMatrix: {
1809 struct vtn_type *base = vtn_get_type(b, w[2]);
1810 unsigned columns = w[3];
1811
1812 vtn_fail_if(base->base_type != vtn_base_type_vector,
1813 "Base type for OpTypeMatrix must be a vector");
1814 vtn_fail_if(columns < 2 || columns > 4,
1815 "Invalid column count for OpTypeMatrix");
1816
1817 val->type->base_type = vtn_base_type_matrix;
1818 val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
1819 glsl_get_vector_elements(base->type),
1820 columns);
1821 vtn_fail_if(glsl_type_is_error(val->type->type),
1822 "Unsupported base type for OpTypeMatrix");
1823 assert(!glsl_type_is_error(val->type->type));
1824 val->type->length = columns;
1825 val->type->array_element = base;
1826 val->type->row_major = false;
1827 val->type->stride = 0;
1828 break;
1829 }
1830
1831 case SpvOpTypeRuntimeArray:
1832 case SpvOpTypeArray: {
1833 struct vtn_type *array_element = vtn_get_type(b, w[2]);
1834
1835 if (opcode == SpvOpTypeRuntimeArray) {
1836 /* A length of 0 is used to denote unsized arrays */
1837 val->type->length = 0;
1838 } else {
1839 val->type->length = vtn_constant_uint(b, w[3]);
1840 }
1841
1842 val->type->base_type = vtn_base_type_array;
1843 val->type->array_element = array_element;
1844
1845 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1846 val->type->type = glsl_array_type(array_element->type, val->type->length,
1847 val->type->stride);
1848 break;
1849 }
1850
1851 case SpvOpTypeStruct: {
1852 unsigned num_fields = count - 2;
1853 val->type->base_type = vtn_base_type_struct;
1854 val->type->length = num_fields;
1855 val->type->members = vtn_alloc_array(b, struct vtn_type *, num_fields);
1856 val->type->offsets = vtn_alloc_array(b, unsigned, num_fields);
1857 val->type->packed = false;
1858
1859 NIR_VLA(struct glsl_struct_field, fields, count);
1860 for (unsigned i = 0; i < num_fields; i++) {
1861 val->type->members[i] = vtn_get_type(b, w[i + 2]);
1862 const char *name = NULL;
1863 for (struct vtn_decoration *dec = val->decoration; dec; dec = dec->next) {
1864 if (dec->scope == VTN_DEC_STRUCT_MEMBER_NAME0 - i) {
1865 name = dec->member_name;
1866 break;
1867 }
1868 }
1869 if (!name)
1870 name = ralloc_asprintf(b, "field%d", i);
1871
1872 fields[i] = (struct glsl_struct_field) {
1873 .type = val->type->members[i]->type,
1874 .name = name,
1875 .location = -1,
1876 .offset = -1,
1877 };
1878 }
1879
1880 vtn_foreach_decoration(b, val, struct_packed_decoration_cb, NULL);
1881
1882 struct member_decoration_ctx ctx = {
1883 .num_fields = num_fields,
1884 .fields = fields,
1885 .type = val->type
1886 };
1887
1888 vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
1889
1890 /* Propagate access specifiers that are present on all members to the overall type */
1891 enum gl_access_qualifier overall_access = ACCESS_COHERENT | ACCESS_VOLATILE |
1892 ACCESS_NON_READABLE | ACCESS_NON_WRITEABLE;
1893 for (unsigned i = 0; i < num_fields; ++i)
1894 overall_access &= val->type->members[i]->access;
1895 val->type->access = overall_access;
1896
1897 vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx);
1898
1899 vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL);
1900
1901 const char *name = val->name;
1902
1903 if (val->type->block || val->type->buffer_block) {
1904 /* Packing will be ignored since types coming from SPIR-V are
1905 * explicitly laid out.
1906 */
1907 val->type->type = glsl_interface_type(fields, num_fields,
1908 /* packing */ 0, false,
1909 name ? name : "block");
1910 } else {
1911 val->type->type = glsl_struct_type(fields, num_fields,
1912 name ? name : "struct",
1913 val->type->packed);
1914 }
1915 break;
1916 }
1917
1918 case SpvOpTypeFunction: {
1919 val->type->base_type = vtn_base_type_function;
1920 val->type->type = NULL;
1921
1922 val->type->return_type = vtn_get_type(b, w[2]);
1923
1924 const unsigned num_params = count - 3;
1925 val->type->length = num_params;
1926 val->type->params = vtn_alloc_array(b, struct vtn_type *, num_params);
1927 for (unsigned i = 0; i < count - 3; i++) {
1928 val->type->params[i] = vtn_get_type(b, w[i + 3]);
1929 }
1930 break;
1931 }
1932
1933 case SpvOpTypePointer:
1934 case SpvOpTypeForwardPointer: {
1935 /* We can't blindly push the value because it might be a forward
1936 * declaration.
1937 */
1938 val = vtn_untyped_value(b, w[1]);
1939
1940 SpvStorageClass storage_class = w[2];
1941
1942 vtn_fail_if(opcode == SpvOpTypeForwardPointer &&
1943 b->shader->info.stage != MESA_SHADER_KERNEL &&
1944 storage_class != SpvStorageClassPhysicalStorageBuffer,
1945 "OpTypeForwardPointer is only allowed in Vulkan with "
1946 "the PhysicalStorageBuffer storage class");
1947
1948 struct vtn_type *pointed_type = NULL;
1949 if (opcode == SpvOpTypePointer)
1950 pointed_type = vtn_get_type(b, w[3]);
1951
1952 bool has_forward_pointer = false;
1953 if (val->value_type == vtn_value_type_invalid) {
1954 val->value_type = vtn_value_type_type;
1955 val->type = vtn_zalloc(b, struct vtn_type);
1956 val->type->id = w[1];
1957 val->type->base_type = vtn_base_type_pointer;
1958 val->type->storage_class = storage_class;
1959
1960 /* These can actually be stored to nir_variables and used as SSA
1961 * values so they need a real glsl_type.
1962 */
1963 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
1964 b, storage_class, pointed_type, NULL);
1965
1966 /* The deref type should only matter for the UniformConstant storage
1967 * class. In particular, it should never matter for any storage
1968 * classes that are allowed in combination with OpTypeForwardPointer.
1969 */
1970 if (storage_class != SpvStorageClassUniform &&
1971 storage_class != SpvStorageClassUniformConstant) {
1972 assert(mode == vtn_storage_class_to_mode(b, storage_class,
1973 NULL, NULL));
1974 }
1975
1976 val->type->type = nir_address_format_to_glsl_type(
1977 vtn_mode_to_address_format(b, mode));
1978 } else {
1979 vtn_fail_if(val->type->storage_class != storage_class,
1980 "The storage classes of an OpTypePointer and any "
1981 "OpTypeForwardPointers that provide forward "
1982 "declarations of it must match.");
1983 has_forward_pointer = true;
1984 }
1985
1986 if (opcode == SpvOpTypePointer) {
1987 vtn_fail_if(val->type->pointed != NULL,
1988 "While OpTypeForwardPointer can be used to provide a "
1989 "forward declaration of a pointer, OpTypePointer can "
1990 "only be used once for a given id.");
1991
1992 vtn_fail_if(has_forward_pointer &&
1993 pointed_type->base_type != vtn_base_type_struct,
1994 "An OpTypePointer instruction must declare "
1995 "Pointer Type to be a pointer to an OpTypeStruct.");
1996
1997 val->type->pointed = pointed_type;
1998
1999 /* Only certain storage classes use ArrayStride. */
2000 switch (storage_class) {
2001 case SpvStorageClassWorkgroup:
2002 if (!b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR)
2003 break;
2004 FALLTHROUGH;
2005
2006 case SpvStorageClassUniform:
2007 case SpvStorageClassPushConstant:
2008 case SpvStorageClassStorageBuffer:
2009 case SpvStorageClassPhysicalStorageBuffer:
2010 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
2011 break;
2012
2013 default:
2014 /* Nothing to do. */
2015 break;
2016 }
2017 }
2018 break;
2019 }
2020
2021 case SpvOpTypeImage: {
2022 val->type->base_type = vtn_base_type_image;
2023
2024 /* Images are represented in NIR as a scalar SSA value that is the
2025 * result of a deref instruction. An OpLoad on an OpTypeImage pointer
2026 * from UniformConstant memory just takes the NIR deref from the pointer
2027 * and turns it into an SSA value.
2028 */
2029 val->type->type = nir_address_format_to_glsl_type(
2030 vtn_mode_to_address_format(b, vtn_variable_mode_function));
2031
2032 const struct vtn_type *sampled_type = vtn_get_type(b, w[2]);
2033 if (b->shader->info.stage == MESA_SHADER_KERNEL) {
2034 vtn_fail_if(sampled_type->base_type != vtn_base_type_void,
2035 "Sampled type of OpTypeImage must be void for kernels");
2036 } else {
2037 vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar,
2038 "Sampled type of OpTypeImage must be a scalar");
2039 if (b->supported_capabilities.Int64ImageEXT) {
2040 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32 &&
2041 glsl_get_bit_size(sampled_type->type) != 64,
2042 "Sampled type of OpTypeImage must be a 32 or 64-bit "
2043 "scalar");
2044 } else {
2045 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32,
2046 "Sampled type of OpTypeImage must be a 32-bit scalar");
2047 }
2048 }
2049
2050 enum glsl_sampler_dim dim;
2051 switch ((SpvDim)w[3]) {
2052 case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break;
2053 case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break;
2054 case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break;
2055 case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break;
2056 case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break;
2057 case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break;
2058 case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break;
2059 default:
2060 vtn_fail("Invalid SPIR-V image dimensionality: %s (%u)",
2061 spirv_dim_to_string((SpvDim)w[3]), w[3]);
2062 }
2063
2064 /* w[4]: as per Vulkan spec "Validation Rules within a Module",
2065 * The “Depth” operand of OpTypeImage is ignored.
2066 */
2067 bool is_array = w[5];
2068 bool multisampled = w[6];
2069 unsigned sampled = w[7];
2070 SpvImageFormat format = w[8];
2071
2072 if (count > 9)
2073 val->type->access_qualifier = w[9];
2074 else if (b->shader->info.stage == MESA_SHADER_KERNEL)
2075 /* Per the CL C spec: If no qualifier is provided, read_only is assumed. */
2076 val->type->access_qualifier = SpvAccessQualifierReadOnly;
2077 else
2078 val->type->access_qualifier = SpvAccessQualifierReadWrite;
2079
2080 if (multisampled) {
2081 if (dim == GLSL_SAMPLER_DIM_2D)
2082 dim = GLSL_SAMPLER_DIM_MS;
2083 else if (dim == GLSL_SAMPLER_DIM_SUBPASS)
2084 dim = GLSL_SAMPLER_DIM_SUBPASS_MS;
2085 else
2086 vtn_fail("Unsupported multisampled image type");
2087 }
2088
2089 val->type->image_format = translate_image_format(b, format);
2090
2091 enum glsl_base_type sampled_base_type =
2092 glsl_get_base_type(sampled_type->type);
2093 if (sampled == 1) {
2094 val->type->glsl_image = glsl_texture_type(dim, is_array,
2095 sampled_base_type);
2096 } else if (sampled == 2) {
2097 val->type->glsl_image = glsl_image_type(dim, is_array,
2098 sampled_base_type);
2099 } else if (b->shader->info.stage == MESA_SHADER_KERNEL) {
2100 val->type->glsl_image = glsl_image_type(dim, is_array,
2101 GLSL_TYPE_VOID);
2102 } else {
2103 vtn_fail("We need to know if the image will be sampled");
2104 }
2105 break;
2106 }
2107
2108 case SpvOpTypeSampledImage: {
2109 val->type->base_type = vtn_base_type_sampled_image;
2110 val->type->image = vtn_get_type(b, w[2]);
2111
2112 validate_image_type_for_sampled_image(
2113 b, val->type->image->glsl_image,
2114 "Image Type operand of OpTypeSampledImage");
2115
2116 /* Sampled images are represented NIR as a vec2 SSA value where each
2117 * component is the result of a deref instruction. The first component
2118 * is the image and the second is the sampler. An OpLoad on an
2119 * OpTypeSampledImage pointer from UniformConstant memory just takes
2120 * the NIR deref from the pointer and duplicates it to both vector
2121 * components.
2122 */
2123 nir_address_format addr_format =
2124 vtn_mode_to_address_format(b, vtn_variable_mode_function);
2125 assert(nir_address_format_num_components(addr_format) == 1);
2126 unsigned bit_size = nir_address_format_bit_size(addr_format);
2127 assert(bit_size == 32 || bit_size == 64);
2128
2129 enum glsl_base_type base_type =
2130 bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64;
2131 val->type->type = glsl_vector_type(base_type, 2);
2132 break;
2133 }
2134
2135 case SpvOpTypeSampler:
2136 val->type->base_type = vtn_base_type_sampler;
2137
2138 /* Samplers are represented in NIR as a scalar SSA value that is the
2139 * result of a deref instruction. An OpLoad on an OpTypeSampler pointer
2140 * from UniformConstant memory just takes the NIR deref from the pointer
2141 * and turns it into an SSA value.
2142 */
2143 val->type->type = nir_address_format_to_glsl_type(
2144 vtn_mode_to_address_format(b, vtn_variable_mode_function));
2145 break;
2146
2147 case SpvOpTypeAccelerationStructureKHR:
2148 val->type->base_type = vtn_base_type_accel_struct;
2149 val->type->type = glsl_uint64_t_type();
2150 break;
2151
2152
2153 case SpvOpTypeOpaque: {
2154 val->type->base_type = vtn_base_type_struct;
2155 const char *name = vtn_string_literal(b, &w[2], count - 2, NULL);
2156 val->type->type = glsl_struct_type(NULL, 0, name, false);
2157 break;
2158 }
2159
2160 case SpvOpTypeRayQueryKHR: {
2161 val->type->base_type = vtn_base_type_ray_query;
2162 val->type->type = glsl_uint64_t_type();
2163 /* We may need to run queries on helper invocations. Here the parser
2164 * doesn't go through a deeper analysis on whether the result of a query
2165 * will be used in derivative instructions.
2166 *
2167 * An implementation willing to optimize this would look through the IR
2168 * and check if any derivative instruction uses the result of a query
2169 * and drop this flag if not.
2170 */
2171 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
2172 val->type->access = ACCESS_INCLUDE_HELPERS;
2173 break;
2174 }
2175
2176 case SpvOpTypeCooperativeMatrixKHR:
2177 vtn_handle_cooperative_type(b, val, opcode, w, count);
2178 break;
2179
2180 case SpvOpTypeEvent:
2181 val->type->base_type = vtn_base_type_event;
2182 /*
2183 * this makes the event type compatible with pointer size due to LLVM 16.
2184 * llvm 17 fixes this properly, but with 16 and opaque ptrs it's still wrong.
2185 */
2186 val->type->type = b->shader->info.cs.ptr_size == 64 ? glsl_int64_t_type() : glsl_int_type();
2187 break;
2188
2189 case SpvOpTypeDeviceEvent:
2190 case SpvOpTypeReserveId:
2191 case SpvOpTypeQueue:
2192 case SpvOpTypePipe:
2193 default:
2194 vtn_fail_with_opcode("Unhandled opcode", opcode);
2195 }
2196
2197 vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
2198
2199 if (val->type->base_type == vtn_base_type_struct &&
2200 (val->type->block || val->type->buffer_block)) {
2201 for (unsigned i = 0; i < val->type->length; i++) {
2202 vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]),
2203 "Block and BufferBlock decorations cannot decorate a "
2204 "structure type that is nested at any level inside "
2205 "another structure type decorated with Block or "
2206 "BufferBlock.");
2207 }
2208 }
2209 }
2210
2211 static nir_constant *
vtn_null_constant(struct vtn_builder * b,struct vtn_type * type)2212 vtn_null_constant(struct vtn_builder *b, struct vtn_type *type)
2213 {
2214 nir_constant *c = rzalloc(b, nir_constant);
2215
2216 switch (type->base_type) {
2217 case vtn_base_type_scalar:
2218 case vtn_base_type_vector:
2219 c->is_null_constant = true;
2220 /* Nothing to do here. It's already initialized to zero */
2221 break;
2222
2223 case vtn_base_type_pointer: {
2224 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
2225 b, type->storage_class, type->pointed, NULL);
2226 nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
2227
2228 const nir_const_value *null_value = nir_address_format_null_value(addr_format);
2229 memcpy(c->values, null_value,
2230 sizeof(nir_const_value) * nir_address_format_num_components(addr_format));
2231 break;
2232 }
2233
2234 case vtn_base_type_void:
2235 case vtn_base_type_image:
2236 case vtn_base_type_sampler:
2237 case vtn_base_type_sampled_image:
2238 case vtn_base_type_function:
2239 case vtn_base_type_event:
2240 /* For those we have to return something but it doesn't matter what. */
2241 break;
2242
2243 case vtn_base_type_matrix:
2244 case vtn_base_type_array:
2245 vtn_assert(type->length > 0);
2246 c->is_null_constant = true;
2247 c->num_elements = type->length;
2248 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2249
2250 c->elements[0] = vtn_null_constant(b, type->array_element);
2251 for (unsigned i = 1; i < c->num_elements; i++)
2252 c->elements[i] = c->elements[0];
2253 break;
2254
2255 case vtn_base_type_struct:
2256 c->is_null_constant = true;
2257 c->num_elements = type->length;
2258 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
2259 for (unsigned i = 0; i < c->num_elements; i++)
2260 c->elements[i] = vtn_null_constant(b, type->members[i]);
2261 break;
2262
2263 default:
2264 vtn_fail("Invalid type for null constant");
2265 }
2266
2267 return c;
2268 }
2269
2270 static void
spec_constant_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,void * data)2271 spec_constant_decoration_cb(struct vtn_builder *b, UNUSED struct vtn_value *val,
2272 ASSERTED int member,
2273 const struct vtn_decoration *dec, void *data)
2274 {
2275 vtn_assert(member == -1);
2276 if (dec->decoration != SpvDecorationSpecId)
2277 return;
2278
2279 nir_const_value *value = data;
2280 for (unsigned i = 0; i < b->num_specializations; i++) {
2281 if (b->specializations[i].id == dec->operands[0]) {
2282 *value = b->specializations[i].value;
2283 return;
2284 }
2285 }
2286 }
2287
2288 static void
handle_workgroup_size_decoration_cb(struct vtn_builder * b,struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,UNUSED void * data)2289 handle_workgroup_size_decoration_cb(struct vtn_builder *b,
2290 struct vtn_value *val,
2291 ASSERTED int member,
2292 const struct vtn_decoration *dec,
2293 UNUSED void *data)
2294 {
2295 vtn_assert(member == -1);
2296 if (dec->decoration != SpvDecorationBuiltIn ||
2297 dec->operands[0] != SpvBuiltInWorkgroupSize)
2298 return;
2299
2300 vtn_assert(val->type->type == glsl_vector_type(GLSL_TYPE_UINT, 3));
2301 b->workgroup_size_builtin = val;
2302 }
2303
2304 static void
vtn_handle_constant(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)2305 vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
2306 const uint32_t *w, unsigned count)
2307 {
2308 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
2309 val->constant = rzalloc(b, nir_constant);
2310 switch (opcode) {
2311 case SpvOpConstantTrue:
2312 case SpvOpConstantFalse:
2313 case SpvOpSpecConstantTrue:
2314 case SpvOpSpecConstantFalse: {
2315 vtn_fail_if(val->type->type != glsl_bool_type(),
2316 "Result type of %s must be OpTypeBool",
2317 spirv_op_to_string(opcode));
2318
2319 bool bval = (opcode == SpvOpConstantTrue ||
2320 opcode == SpvOpSpecConstantTrue);
2321
2322 nir_const_value u32val = nir_const_value_for_uint(bval, 32);
2323
2324 if (opcode == SpvOpSpecConstantTrue ||
2325 opcode == SpvOpSpecConstantFalse)
2326 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32val);
2327
2328 val->constant->values[0].b = u32val.u32 != 0;
2329 break;
2330 }
2331
2332 case SpvOpConstant:
2333 case SpvOpSpecConstant: {
2334 vtn_fail_if(val->type->base_type != vtn_base_type_scalar,
2335 "Result type of %s must be a scalar",
2336 spirv_op_to_string(opcode));
2337 int bit_size = glsl_get_bit_size(val->type->type);
2338 switch (bit_size) {
2339 case 64:
2340 val->constant->values[0].u64 = vtn_u64_literal(&w[3]);
2341 break;
2342 case 32:
2343 val->constant->values[0].u32 = w[3];
2344 break;
2345 case 16:
2346 val->constant->values[0].u16 = w[3];
2347 break;
2348 case 8:
2349 val->constant->values[0].u8 = w[3];
2350 break;
2351 default:
2352 vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size);
2353 }
2354
2355 if (opcode == SpvOpSpecConstant)
2356 vtn_foreach_decoration(b, val, spec_constant_decoration_cb,
2357 &val->constant->values[0]);
2358 break;
2359 }
2360
2361 case SpvOpSpecConstantComposite:
2362 case SpvOpConstantComposite:
2363 case SpvOpConstantCompositeReplicateEXT:
2364 case SpvOpSpecConstantCompositeReplicateEXT: {
2365 const unsigned elem_count =
2366 val->type->base_type == vtn_base_type_cooperative_matrix ?
2367 1 : val->type->length;
2368
2369 nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
2370 if (opcode == SpvOpConstantCompositeReplicateEXT ||
2371 opcode == SpvOpSpecConstantCompositeReplicateEXT) {
2372 struct vtn_value *elem_val = vtn_untyped_value(b, w[3]);
2373
2374 if (elem_val->value_type == vtn_value_type_constant) {
2375 elems[0] = elem_val->constant;
2376 val->is_undef_constant = false;
2377 } else {
2378 vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
2379 "only constants or undefs allowed for %s",
2380 spirv_op_to_string(opcode));
2381 /* to make it easier, just insert a NULL constant for now */
2382 elems[0] = vtn_null_constant(b, elem_val->type);
2383 val->is_undef_constant = true;
2384 }
2385
2386 for (unsigned i = 1; i < elem_count; i++)
2387 elems[i] = elems[0];
2388 } else {
2389 vtn_fail_if(elem_count != count - 3,
2390 "%s has %u constituents, expected %u",
2391 spirv_op_to_string(opcode), count - 3, elem_count);
2392
2393 val->is_undef_constant = true;
2394 for (unsigned i = 0; i < elem_count; i++) {
2395 struct vtn_value *elem_val = vtn_untyped_value(b, w[i + 3]);
2396
2397 if (elem_val->value_type == vtn_value_type_constant) {
2398 elems[i] = elem_val->constant;
2399 val->is_undef_constant = val->is_undef_constant &&
2400 elem_val->is_undef_constant;
2401 } else {
2402 vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
2403 "only constants or undefs allowed for %s",
2404 spirv_op_to_string(opcode));
2405 /* to make it easier, just insert a NULL constant for now */
2406 elems[i] = vtn_null_constant(b, elem_val->type);
2407 }
2408 }
2409 }
2410
2411 switch (val->type->base_type) {
2412 case vtn_base_type_vector: {
2413 assert(glsl_type_is_vector(val->type->type));
2414 for (unsigned i = 0; i < elem_count; i++)
2415 val->constant->values[i] = elems[i]->values[0];
2416 break;
2417 }
2418
2419 case vtn_base_type_matrix:
2420 case vtn_base_type_struct:
2421 case vtn_base_type_array:
2422 ralloc_steal(val->constant, elems);
2423 val->constant->num_elements = elem_count;
2424 val->constant->elements = elems;
2425 break;
2426
2427 case vtn_base_type_cooperative_matrix:
2428 val->constant->values[0] = elems[0]->values[0];
2429 break;
2430
2431 default:
2432 vtn_fail("Result type of %s must be a composite type",
2433 spirv_op_to_string(opcode));
2434 }
2435 break;
2436 }
2437
2438 case SpvOpSpecConstantOp: {
2439 nir_const_value u32op = nir_const_value_for_uint(w[3], 32);
2440 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32op);
2441 SpvOp opcode = u32op.u32;
2442 switch (opcode) {
2443 case SpvOpVectorShuffle: {
2444 struct vtn_value *v0 = &b->values[w[4]];
2445 struct vtn_value *v1 = &b->values[w[5]];
2446
2447 vtn_assert(v0->value_type == vtn_value_type_constant ||
2448 v0->value_type == vtn_value_type_undef);
2449 vtn_assert(v1->value_type == vtn_value_type_constant ||
2450 v1->value_type == vtn_value_type_undef);
2451
2452 unsigned len0 = glsl_get_vector_elements(v0->type->type);
2453 unsigned len1 = glsl_get_vector_elements(v1->type->type);
2454
2455 vtn_assert(len0 + len1 < 16);
2456
2457 unsigned bit_size = glsl_get_bit_size(val->type->type);
2458 unsigned bit_size0 = glsl_get_bit_size(v0->type->type);
2459 unsigned bit_size1 = glsl_get_bit_size(v1->type->type);
2460
2461 vtn_assert(bit_size == bit_size0 && bit_size == bit_size1);
2462 (void)bit_size0; (void)bit_size1;
2463
2464 nir_const_value undef = { .u64 = 0xdeadbeefdeadbeef };
2465 nir_const_value combined[NIR_MAX_VEC_COMPONENTS * 2];
2466
2467 if (v0->value_type == vtn_value_type_constant) {
2468 for (unsigned i = 0; i < len0; i++)
2469 combined[i] = v0->constant->values[i];
2470 }
2471 if (v1->value_type == vtn_value_type_constant) {
2472 for (unsigned i = 0; i < len1; i++)
2473 combined[len0 + i] = v1->constant->values[i];
2474 }
2475
2476 for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
2477 uint32_t comp = w[i + 6];
2478 if (comp == (uint32_t)-1) {
2479 /* If component is not used, set the value to a known constant
2480 * to detect if it is wrongly used.
2481 */
2482 val->constant->values[j] = undef;
2483 } else {
2484 vtn_fail_if(comp >= len0 + len1,
2485 "All Component literals must either be FFFFFFFF "
2486 "or in [0, N - 1] (inclusive).");
2487 val->constant->values[j] = combined[comp];
2488 }
2489 }
2490 break;
2491 }
2492
2493 case SpvOpCompositeExtract:
2494 case SpvOpCompositeInsert: {
2495 struct vtn_value *comp;
2496 unsigned deref_start;
2497 struct nir_constant **c;
2498 if (opcode == SpvOpCompositeExtract) {
2499 comp = vtn_value(b, w[4], vtn_value_type_constant);
2500 deref_start = 5;
2501 c = &comp->constant;
2502 } else {
2503 comp = vtn_value(b, w[5], vtn_value_type_constant);
2504 deref_start = 6;
2505 val->constant = nir_constant_clone(comp->constant,
2506 (nir_variable *)b);
2507 c = &val->constant;
2508 }
2509
2510 int elem = -1;
2511 const struct vtn_type *type = comp->type;
2512 for (unsigned i = deref_start; i < count; i++) {
2513 if (type->base_type == vtn_base_type_cooperative_matrix) {
2514 /* Cooperative matrices are always scalar constants. We don't
2515 * care about the index w[i] because it's always replicated.
2516 */
2517 type = type->component_type;
2518 } else {
2519 vtn_fail_if(w[i] > type->length,
2520 "%uth index of %s is %u but the type has only "
2521 "%u elements", i - deref_start,
2522 spirv_op_to_string(opcode), w[i], type->length);
2523
2524 switch (type->base_type) {
2525 case vtn_base_type_vector:
2526 elem = w[i];
2527 type = type->array_element;
2528 break;
2529
2530 case vtn_base_type_matrix:
2531 case vtn_base_type_array:
2532 c = &(*c)->elements[w[i]];
2533 type = type->array_element;
2534 break;
2535
2536 case vtn_base_type_struct:
2537 c = &(*c)->elements[w[i]];
2538 type = type->members[w[i]];
2539 break;
2540
2541 default:
2542 vtn_fail("%s must only index into composite types",
2543 spirv_op_to_string(opcode));
2544 }
2545 }
2546 }
2547
2548 if (opcode == SpvOpCompositeExtract) {
2549 if (elem == -1) {
2550 val->constant = *c;
2551 } else {
2552 unsigned num_components = type->length;
2553 for (unsigned i = 0; i < num_components; i++)
2554 val->constant->values[i] = (*c)->values[elem + i];
2555 }
2556 } else {
2557 struct vtn_value *insert =
2558 vtn_value(b, w[4], vtn_value_type_constant);
2559 vtn_assert(insert->type == type);
2560 if (elem == -1) {
2561 *c = insert->constant;
2562 } else {
2563 unsigned num_components = type->length;
2564 for (unsigned i = 0; i < num_components; i++)
2565 (*c)->values[elem + i] = insert->constant->values[i];
2566 }
2567 }
2568 break;
2569 }
2570
2571 default: {
2572 bool swap;
2573 nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(val->type->type);
2574 nir_alu_type src_alu_type = dst_alu_type;
2575 unsigned num_components = glsl_get_vector_elements(val->type->type);
2576 unsigned bit_size;
2577
2578 vtn_assert(count <= 7);
2579
2580 switch (opcode) {
2581 case SpvOpSConvert:
2582 case SpvOpFConvert:
2583 case SpvOpUConvert:
2584 /* We have a source in a conversion */
2585 src_alu_type =
2586 nir_get_nir_type_for_glsl_type(vtn_get_value_type(b, w[4])->type);
2587 /* We use the bitsize of the conversion source to evaluate the opcode later */
2588 bit_size = glsl_get_bit_size(vtn_get_value_type(b, w[4])->type);
2589 break;
2590 default:
2591 bit_size = glsl_get_bit_size(val->type->type);
2592 };
2593
2594 bool exact;
2595 nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
2596 nir_alu_type_get_type_size(src_alu_type),
2597 nir_alu_type_get_type_size(dst_alu_type));
2598
2599 /* No SPIR-V opcodes handled through this path should set exact.
2600 * Since it is ignored, assert on it.
2601 */
2602 assert(!exact);
2603
2604 nir_const_value src[3][NIR_MAX_VEC_COMPONENTS];
2605
2606 for (unsigned i = 0; i < count - 4; i++) {
2607 struct vtn_value *src_val =
2608 vtn_value(b, w[4 + i], vtn_value_type_constant);
2609
2610 /* If this is an unsized source, pull the bit size from the
2611 * source; otherwise, we'll use the bit size from the destination.
2612 */
2613 if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]))
2614 bit_size = glsl_get_bit_size(src_val->type->type);
2615
2616 unsigned src_comps = nir_op_infos[op].input_sizes[i] ?
2617 nir_op_infos[op].input_sizes[i] :
2618 num_components;
2619
2620 unsigned j = swap ? 1 - i : i;
2621 for (unsigned c = 0; c < src_comps; c++)
2622 src[j][c] = src_val->constant->values[c];
2623 }
2624
2625 /* fix up fixed size sources */
2626 switch (op) {
2627 case nir_op_ishl:
2628 case nir_op_ishr:
2629 case nir_op_ushr: {
2630 if (bit_size == 32)
2631 break;
2632 for (unsigned i = 0; i < num_components; ++i) {
2633 switch (bit_size) {
2634 case 64: src[1][i].u32 = src[1][i].u64; break;
2635 case 16: src[1][i].u32 = src[1][i].u16; break;
2636 case 8: src[1][i].u32 = src[1][i].u8; break;
2637 }
2638 }
2639 break;
2640 }
2641 default:
2642 break;
2643 }
2644
2645 nir_const_value *srcs[3] = {
2646 src[0], src[1], src[2],
2647 };
2648 nir_eval_const_opcode(op, val->constant->values,
2649 num_components, bit_size, srcs,
2650 b->shader->info.float_controls_execution_mode);
2651 break;
2652 } /* default */
2653 }
2654 break;
2655 }
2656
2657 case SpvOpConstantNull:
2658 val->constant = vtn_null_constant(b, val->type);
2659 val->is_null_constant = true;
2660 break;
2661
2662 default:
2663 vtn_fail_with_opcode("Unhandled opcode", opcode);
2664 }
2665
2666 /* Now that we have the value, update the workgroup size if needed */
2667 if (gl_shader_stage_uses_workgroup(b->entry_point_stage))
2668 vtn_foreach_decoration(b, val, handle_workgroup_size_decoration_cb,
2669 NULL);
2670 }
2671
2672 static void
vtn_split_barrier_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics,SpvMemorySemanticsMask * before,SpvMemorySemanticsMask * after)2673 vtn_split_barrier_semantics(struct vtn_builder *b,
2674 SpvMemorySemanticsMask semantics,
2675 SpvMemorySemanticsMask *before,
2676 SpvMemorySemanticsMask *after)
2677 {
2678 /* For memory semantics embedded in operations, we split them into up to
2679 * two barriers, to be added before and after the operation. This is less
2680 * strict than if we propagated until the final backend stage, but still
2681 * result in correct execution.
2682 *
2683 * A further improvement could be pipe this information (and use!) into the
2684 * next compiler layers, at the expense of making the handling of barriers
2685 * more complicated.
2686 */
2687
2688 *before = SpvMemorySemanticsMaskNone;
2689 *after = SpvMemorySemanticsMaskNone;
2690
2691 SpvMemorySemanticsMask order_semantics =
2692 semantics & (SpvMemorySemanticsAcquireMask |
2693 SpvMemorySemanticsReleaseMask |
2694 SpvMemorySemanticsAcquireReleaseMask |
2695 SpvMemorySemanticsSequentiallyConsistentMask);
2696
2697 if (util_bitcount(order_semantics) > 1) {
2698 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2699 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2700 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2701 */
2702 vtn_warn("Multiple memory ordering semantics specified, "
2703 "assuming AcquireRelease.");
2704 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2705 }
2706
2707 const SpvMemorySemanticsMask av_vis_semantics =
2708 semantics & (SpvMemorySemanticsMakeAvailableMask |
2709 SpvMemorySemanticsMakeVisibleMask);
2710
2711 const SpvMemorySemanticsMask storage_semantics =
2712 semantics & (SpvMemorySemanticsUniformMemoryMask |
2713 SpvMemorySemanticsSubgroupMemoryMask |
2714 SpvMemorySemanticsWorkgroupMemoryMask |
2715 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2716 SpvMemorySemanticsAtomicCounterMemoryMask |
2717 SpvMemorySemanticsImageMemoryMask |
2718 SpvMemorySemanticsOutputMemoryMask);
2719
2720 const SpvMemorySemanticsMask other_semantics =
2721 semantics & ~(order_semantics | av_vis_semantics | storage_semantics |
2722 SpvMemorySemanticsVolatileMask);
2723
2724 if (other_semantics)
2725 vtn_warn("Ignoring unhandled memory semantics: %u\n", other_semantics);
2726
2727 /* SequentiallyConsistent is treated as AcquireRelease. */
2728
2729 /* The RELEASE barrier happens BEFORE the operation, and it is usually
2730 * associated with a Store. All the write operations with a matching
2731 * semantics will not be reordered after the Store.
2732 */
2733 if (order_semantics & (SpvMemorySemanticsReleaseMask |
2734 SpvMemorySemanticsAcquireReleaseMask |
2735 SpvMemorySemanticsSequentiallyConsistentMask)) {
2736 *before |= SpvMemorySemanticsReleaseMask | storage_semantics;
2737 }
2738
2739 /* The ACQUIRE barrier happens AFTER the operation, and it is usually
2740 * associated with a Load. All the operations with a matching semantics
2741 * will not be reordered before the Load.
2742 */
2743 if (order_semantics & (SpvMemorySemanticsAcquireMask |
2744 SpvMemorySemanticsAcquireReleaseMask |
2745 SpvMemorySemanticsSequentiallyConsistentMask)) {
2746 *after |= SpvMemorySemanticsAcquireMask | storage_semantics;
2747 }
2748
2749 if (av_vis_semantics & SpvMemorySemanticsMakeVisibleMask)
2750 *before |= SpvMemorySemanticsMakeVisibleMask | storage_semantics;
2751
2752 if (av_vis_semantics & SpvMemorySemanticsMakeAvailableMask)
2753 *after |= SpvMemorySemanticsMakeAvailableMask | storage_semantics;
2754 }
2755
2756 static nir_memory_semantics
vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2757 vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder *b,
2758 SpvMemorySemanticsMask semantics)
2759 {
2760 nir_memory_semantics nir_semantics = 0;
2761
2762 SpvMemorySemanticsMask order_semantics =
2763 semantics & (SpvMemorySemanticsAcquireMask |
2764 SpvMemorySemanticsReleaseMask |
2765 SpvMemorySemanticsAcquireReleaseMask |
2766 SpvMemorySemanticsSequentiallyConsistentMask);
2767
2768 if (util_bitcount(order_semantics) > 1) {
2769 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2770 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2771 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2772 */
2773 vtn_warn("Multiple memory ordering semantics bits specified, "
2774 "assuming AcquireRelease.");
2775 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2776 }
2777
2778 switch (order_semantics) {
2779 case 0:
2780 /* Not an ordering barrier. */
2781 break;
2782
2783 case SpvMemorySemanticsAcquireMask:
2784 nir_semantics = NIR_MEMORY_ACQUIRE;
2785 break;
2786
2787 case SpvMemorySemanticsReleaseMask:
2788 nir_semantics = NIR_MEMORY_RELEASE;
2789 break;
2790
2791 case SpvMemorySemanticsSequentiallyConsistentMask:
2792 FALLTHROUGH; /* Treated as AcquireRelease in Vulkan. */
2793 case SpvMemorySemanticsAcquireReleaseMask:
2794 nir_semantics = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE;
2795 break;
2796
2797 default:
2798 unreachable("Invalid memory order semantics");
2799 }
2800
2801 if (semantics & SpvMemorySemanticsMakeAvailableMask) {
2802 vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
2803 "To use MakeAvailable memory semantics the VulkanMemoryModel "
2804 "capability must be declared.");
2805 nir_semantics |= NIR_MEMORY_MAKE_AVAILABLE;
2806 }
2807
2808 if (semantics & SpvMemorySemanticsMakeVisibleMask) {
2809 vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
2810 "To use MakeVisible memory semantics the VulkanMemoryModel "
2811 "capability must be declared.");
2812 nir_semantics |= NIR_MEMORY_MAKE_VISIBLE;
2813 }
2814
2815 return nir_semantics;
2816 }
2817
2818 static nir_variable_mode
vtn_mem_semantics_to_nir_var_modes(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2819 vtn_mem_semantics_to_nir_var_modes(struct vtn_builder *b,
2820 SpvMemorySemanticsMask semantics)
2821 {
2822 /* Vulkan Environment for SPIR-V says "SubgroupMemory, CrossWorkgroupMemory,
2823 * and AtomicCounterMemory are ignored".
2824 */
2825 if (b->options->environment == NIR_SPIRV_VULKAN) {
2826 semantics &= ~(SpvMemorySemanticsSubgroupMemoryMask |
2827 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2828 SpvMemorySemanticsAtomicCounterMemoryMask);
2829 }
2830
2831 nir_variable_mode modes = 0;
2832 if (semantics & SpvMemorySemanticsUniformMemoryMask)
2833 modes |= nir_var_mem_ssbo | nir_var_mem_global;
2834 if (semantics & SpvMemorySemanticsImageMemoryMask)
2835 modes |= nir_var_image;
2836 if (semantics & SpvMemorySemanticsWorkgroupMemoryMask)
2837 modes |= nir_var_mem_shared;
2838 if (semantics & SpvMemorySemanticsCrossWorkgroupMemoryMask)
2839 modes |= nir_var_mem_global;
2840 if (semantics & SpvMemorySemanticsOutputMemoryMask) {
2841 modes |= nir_var_shader_out;
2842
2843 if (b->shader->info.stage == MESA_SHADER_TASK)
2844 modes |= nir_var_mem_task_payload;
2845 }
2846
2847 if (semantics & SpvMemorySemanticsAtomicCounterMemoryMask) {
2848 /* There's no nir_var_atomic_counter, but since atomic counters are
2849 * lowered to SSBOs, we use nir_var_mem_ssbo instead.
2850 */
2851 modes |= nir_var_mem_ssbo;
2852 }
2853
2854 return modes;
2855 }
2856
2857 mesa_scope
vtn_translate_scope(struct vtn_builder * b,SpvScope scope)2858 vtn_translate_scope(struct vtn_builder *b, SpvScope scope)
2859 {
2860 switch (scope) {
2861 case SpvScopeDevice:
2862 vtn_fail_if(b->supported_capabilities.VulkanMemoryModel &&
2863 !b->supported_capabilities.VulkanMemoryModelDeviceScope,
2864 "If the Vulkan memory model is declared and any instruction "
2865 "uses Device scope, the VulkanMemoryModelDeviceScope "
2866 "capability must be declared.");
2867 return SCOPE_DEVICE;
2868
2869 case SpvScopeQueueFamily:
2870 vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
2871 "To use Queue Family scope, the VulkanMemoryModel capability "
2872 "must be declared.");
2873 return SCOPE_QUEUE_FAMILY;
2874
2875 case SpvScopeWorkgroup:
2876 return SCOPE_WORKGROUP;
2877
2878 case SpvScopeSubgroup:
2879 return SCOPE_SUBGROUP;
2880
2881 case SpvScopeInvocation:
2882 return SCOPE_INVOCATION;
2883
2884 case SpvScopeShaderCallKHR:
2885 return SCOPE_SHADER_CALL;
2886
2887 default:
2888 vtn_fail("Invalid memory scope");
2889 }
2890 }
2891
2892 static void
vtn_emit_scoped_control_barrier(struct vtn_builder * b,SpvScope exec_scope,SpvScope mem_scope,SpvMemorySemanticsMask semantics)2893 vtn_emit_scoped_control_barrier(struct vtn_builder *b, SpvScope exec_scope,
2894 SpvScope mem_scope,
2895 SpvMemorySemanticsMask semantics)
2896 {
2897 nir_memory_semantics nir_semantics =
2898 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2899 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2900 mesa_scope nir_exec_scope = vtn_translate_scope(b, exec_scope);
2901
2902 /* Memory semantics is optional for OpControlBarrier. */
2903 mesa_scope nir_mem_scope;
2904 if (nir_semantics == 0 || modes == 0)
2905 nir_mem_scope = SCOPE_NONE;
2906 else
2907 nir_mem_scope = vtn_translate_scope(b, mem_scope);
2908
2909 nir_barrier(&b->nb, .execution_scope=nir_exec_scope, .memory_scope=nir_mem_scope,
2910 .memory_semantics=nir_semantics, .memory_modes=modes);
2911 }
2912
2913 void
vtn_emit_memory_barrier(struct vtn_builder * b,SpvScope scope,SpvMemorySemanticsMask semantics)2914 vtn_emit_memory_barrier(struct vtn_builder *b, SpvScope scope,
2915 SpvMemorySemanticsMask semantics)
2916 {
2917 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2918 nir_memory_semantics nir_semantics =
2919 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2920
2921 /* No barrier to add. */
2922 if (nir_semantics == 0 || modes == 0)
2923 return;
2924
2925 nir_barrier(&b->nb, .memory_scope=vtn_translate_scope(b, scope),
2926 .memory_semantics=nir_semantics,
2927 .memory_modes=modes);
2928 }
2929
2930 struct vtn_ssa_value *
vtn_create_ssa_value(struct vtn_builder * b,const struct glsl_type * type)2931 vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
2932 {
2933 /* Always use bare types for SSA values for a couple of reasons:
2934 *
2935 * 1. Code which emits deref chains should never listen to the explicit
2936 * layout information on the SSA value if any exists. If we've
2937 * accidentally been relying on this, we want to find those bugs.
2938 *
2939 * 2. We want to be able to quickly check that an SSA value being assigned
2940 * to a SPIR-V value has the right type. Using bare types everywhere
2941 * ensures that we can pointer-compare.
2942 */
2943 struct vtn_ssa_value *val = vtn_zalloc(b, struct vtn_ssa_value);
2944 val->type = glsl_get_bare_type(type);
2945
2946
2947 if (!glsl_type_is_vector_or_scalar(type)) {
2948 unsigned elems = glsl_get_length(val->type);
2949 val->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
2950 if (glsl_type_is_array_or_matrix(type) || glsl_type_is_cmat(type)) {
2951 const struct glsl_type *elem_type = glsl_get_array_element(type);
2952 for (unsigned i = 0; i < elems; i++)
2953 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2954 } else {
2955 vtn_assert(glsl_type_is_struct_or_ifc(type));
2956 for (unsigned i = 0; i < elems; i++) {
2957 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
2958 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2959 }
2960 }
2961 }
2962
2963 return val;
2964 }
2965
2966 void
vtn_set_ssa_value_var(struct vtn_builder * b,struct vtn_ssa_value * ssa,nir_variable * var)2967 vtn_set_ssa_value_var(struct vtn_builder *b, struct vtn_ssa_value *ssa, nir_variable *var)
2968 {
2969 vtn_assert(glsl_type_is_cmat(var->type));
2970 vtn_assert(var->type == ssa->type);
2971 ssa->is_variable = true;
2972 ssa->var = var;
2973 }
2974
2975 static nir_tex_src
vtn_tex_src(struct vtn_builder * b,unsigned index,nir_tex_src_type type)2976 vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
2977 {
2978 return nir_tex_src_for_ssa(type, vtn_get_nir_ssa(b, index));
2979 }
2980
2981 static uint32_t
image_operand_arg(struct vtn_builder * b,const uint32_t * w,uint32_t count,uint32_t mask_idx,SpvImageOperandsMask op)2982 image_operand_arg(struct vtn_builder *b, const uint32_t *w, uint32_t count,
2983 uint32_t mask_idx, SpvImageOperandsMask op)
2984 {
2985 static const SpvImageOperandsMask ops_with_arg =
2986 SpvImageOperandsBiasMask |
2987 SpvImageOperandsLodMask |
2988 SpvImageOperandsGradMask |
2989 SpvImageOperandsConstOffsetMask |
2990 SpvImageOperandsOffsetMask |
2991 SpvImageOperandsConstOffsetsMask |
2992 SpvImageOperandsSampleMask |
2993 SpvImageOperandsMinLodMask |
2994 SpvImageOperandsMakeTexelAvailableMask |
2995 SpvImageOperandsMakeTexelVisibleMask;
2996
2997 assert(util_bitcount(op) == 1);
2998 assert(w[mask_idx] & op);
2999 assert(op & ops_with_arg);
3000
3001 uint32_t idx = util_bitcount(w[mask_idx] & (op - 1) & ops_with_arg) + 1;
3002
3003 /* Adjust indices for operands with two arguments. */
3004 static const SpvImageOperandsMask ops_with_two_args =
3005 SpvImageOperandsGradMask;
3006 idx += util_bitcount(w[mask_idx] & (op - 1) & ops_with_two_args);
3007
3008 idx += mask_idx;
3009
3010 vtn_fail_if(idx + (op & ops_with_two_args ? 1 : 0) >= count,
3011 "Image op claims to have %s but does not enough "
3012 "following operands", spirv_imageoperands_to_string(op));
3013
3014 return idx;
3015 }
3016
3017 static void
non_uniform_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)3018 non_uniform_decoration_cb(struct vtn_builder *b,
3019 struct vtn_value *val, int member,
3020 const struct vtn_decoration *dec, void *void_ctx)
3021 {
3022 enum gl_access_qualifier *access = void_ctx;
3023 switch (dec->decoration) {
3024 case SpvDecorationNonUniformEXT:
3025 *access |= ACCESS_NON_UNIFORM;
3026 break;
3027
3028 default:
3029 break;
3030 }
3031 }
3032
3033 /* Apply SignExtend/ZeroExtend operands to get the actual result type for
3034 * image read/sample operations and source type for write operations.
3035 */
3036 static nir_alu_type
get_image_type(struct vtn_builder * b,nir_alu_type type,unsigned operands)3037 get_image_type(struct vtn_builder *b, nir_alu_type type, unsigned operands)
3038 {
3039 unsigned extend_operands =
3040 operands & (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask);
3041 vtn_fail_if(nir_alu_type_get_base_type(type) == nir_type_float && extend_operands,
3042 "SignExtend/ZeroExtend used on floating-point texel type");
3043 vtn_fail_if(extend_operands ==
3044 (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask),
3045 "SignExtend and ZeroExtend both specified");
3046
3047 if (operands & SpvImageOperandsSignExtendMask)
3048 return nir_type_int | nir_alu_type_get_type_size(type);
3049 if (operands & SpvImageOperandsZeroExtendMask)
3050 return nir_type_uint | nir_alu_type_get_type_size(type);
3051
3052 return type;
3053 }
3054
3055 static void
vtn_handle_texture(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3056 vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
3057 const uint32_t *w, unsigned count)
3058 {
3059 if (opcode == SpvOpSampledImage) {
3060 struct vtn_sampled_image si = {
3061 .image = vtn_get_image(b, w[3], NULL),
3062 .sampler = vtn_get_sampler(b, w[4]),
3063 };
3064
3065 validate_image_type_for_sampled_image(
3066 b, si.image->type,
3067 "Type of Image operand of OpSampledImage");
3068
3069 enum gl_access_qualifier access = 0;
3070 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
3071 non_uniform_decoration_cb, &access);
3072 vtn_foreach_decoration(b, vtn_untyped_value(b, w[4]),
3073 non_uniform_decoration_cb, &access);
3074
3075 vtn_push_sampled_image(b, w[2], si, access & ACCESS_NON_UNIFORM);
3076 return;
3077 } else if (opcode == SpvOpImage) {
3078 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
3079
3080 enum gl_access_qualifier access = 0;
3081 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
3082 non_uniform_decoration_cb, &access);
3083
3084 vtn_push_image(b, w[2], si.image, access & ACCESS_NON_UNIFORM);
3085 return;
3086 } else if (opcode == SpvOpImageSparseTexelsResident) {
3087 nir_def *code = vtn_get_nir_ssa(b, w[3]);
3088 vtn_push_nir_ssa(b, w[2], nir_is_sparse_texels_resident(&b->nb, 1, code));
3089 return;
3090 }
3091
3092 nir_deref_instr *image = NULL, *sampler = NULL;
3093 struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
3094 if (sampled_val->type->base_type == vtn_base_type_sampled_image) {
3095 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
3096 image = si.image;
3097 sampler = si.sampler;
3098 } else {
3099 image = vtn_get_image(b, w[3], NULL);
3100 }
3101
3102 const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image->type);
3103 const bool is_array = glsl_sampler_type_is_array(image->type);
3104 nir_alu_type dest_type = nir_type_invalid;
3105
3106 /* Figure out the base texture operation */
3107 nir_texop texop;
3108 switch (opcode) {
3109 case SpvOpImageSampleImplicitLod:
3110 case SpvOpImageSparseSampleImplicitLod:
3111 case SpvOpImageSampleDrefImplicitLod:
3112 case SpvOpImageSparseSampleDrefImplicitLod:
3113 vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
3114 sampler_dim != GLSL_SAMPLER_DIM_MS &&
3115 sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
3116 texop = nir_texop_tex;
3117 break;
3118
3119 case SpvOpImageSampleProjImplicitLod:
3120 case SpvOpImageSampleProjDrefImplicitLod:
3121 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3122 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3123 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3124 sampler_dim == GLSL_SAMPLER_DIM_RECT);
3125 vtn_assert(!is_array);
3126 texop = nir_texop_tex;
3127 break;
3128
3129 case SpvOpImageSampleExplicitLod:
3130 case SpvOpImageSparseSampleExplicitLod:
3131 case SpvOpImageSampleDrefExplicitLod:
3132 case SpvOpImageSparseSampleDrefExplicitLod:
3133 vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_BUF &&
3134 sampler_dim != GLSL_SAMPLER_DIM_MS &&
3135 sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS);
3136 texop = nir_texop_txl;
3137 break;
3138
3139 case SpvOpImageSampleProjExplicitLod:
3140 case SpvOpImageSampleProjDrefExplicitLod:
3141 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3142 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3143 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3144 sampler_dim == GLSL_SAMPLER_DIM_RECT);
3145 vtn_assert(!is_array);
3146 texop = nir_texop_txl;
3147 break;
3148
3149 case SpvOpImageFetch:
3150 case SpvOpImageSparseFetch:
3151 vtn_assert(sampler_dim != GLSL_SAMPLER_DIM_CUBE);
3152 if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
3153 texop = nir_texop_txf_ms;
3154 } else {
3155 texop = nir_texop_txf;
3156 }
3157 break;
3158
3159 case SpvOpImageGather:
3160 case SpvOpImageSparseGather:
3161 case SpvOpImageDrefGather:
3162 case SpvOpImageSparseDrefGather:
3163 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_2D ||
3164 sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
3165 sampler_dim == GLSL_SAMPLER_DIM_RECT);
3166 texop = nir_texop_tg4;
3167 break;
3168
3169 case SpvOpImageQuerySizeLod:
3170 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3171 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3172 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3173 sampler_dim == GLSL_SAMPLER_DIM_CUBE);
3174 texop = nir_texop_txs;
3175 dest_type = nir_type_int32;
3176 break;
3177
3178 case SpvOpImageQuerySize:
3179 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3180 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3181 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3182 sampler_dim == GLSL_SAMPLER_DIM_CUBE ||
3183 sampler_dim == GLSL_SAMPLER_DIM_RECT ||
3184 sampler_dim == GLSL_SAMPLER_DIM_MS ||
3185 sampler_dim == GLSL_SAMPLER_DIM_BUF);
3186 texop = nir_texop_txs;
3187 dest_type = nir_type_int32;
3188 break;
3189
3190 case SpvOpImageQueryLod:
3191 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3192 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3193 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3194 sampler_dim == GLSL_SAMPLER_DIM_CUBE);
3195 texop = nir_texop_lod;
3196 dest_type = nir_type_float32;
3197 break;
3198
3199 case SpvOpImageQueryLevels:
3200 /* This operation is not valid for a MS image but present in some old
3201 * shaders. Just return 1 in those cases.
3202 */
3203 if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
3204 vtn_warn("OpImageQueryLevels 'Sampled Image' should have an MS of 0, "
3205 "but found MS of 1. Replacing query with constant value 1.");
3206 vtn_push_nir_ssa(b, w[2], nir_imm_int(&b->nb, 1));
3207 return;
3208 }
3209 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_1D ||
3210 sampler_dim == GLSL_SAMPLER_DIM_2D ||
3211 sampler_dim == GLSL_SAMPLER_DIM_3D ||
3212 sampler_dim == GLSL_SAMPLER_DIM_CUBE);
3213 texop = nir_texop_query_levels;
3214 dest_type = nir_type_int32;
3215 break;
3216
3217 case SpvOpImageQuerySamples:
3218 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS);
3219 texop = nir_texop_texture_samples;
3220 dest_type = nir_type_int32;
3221 break;
3222
3223 case SpvOpFragmentFetchAMD:
3224 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
3225 sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3226 texop = nir_texop_fragment_fetch_amd;
3227 break;
3228
3229 case SpvOpFragmentMaskFetchAMD:
3230 vtn_assert(sampler_dim == GLSL_SAMPLER_DIM_MS ||
3231 sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3232 texop = nir_texop_fragment_mask_fetch_amd;
3233 dest_type = nir_type_uint32;
3234 break;
3235
3236 default:
3237 vtn_fail_with_opcode("Unhandled opcode", opcode);
3238 }
3239
3240 nir_tex_src srcs[10]; /* 10 should be enough */
3241 nir_tex_src *p = srcs;
3242
3243 p->src = nir_src_for_ssa(&image->def);
3244 p->src_type = nir_tex_src_texture_deref;
3245 p++;
3246
3247 switch (texop) {
3248 case nir_texop_tex:
3249 case nir_texop_txb:
3250 case nir_texop_txl:
3251 case nir_texop_txd:
3252 case nir_texop_tg4:
3253 case nir_texop_lod:
3254 vtn_fail_if(sampler == NULL,
3255 "%s requires an image of type OpTypeSampledImage",
3256 spirv_op_to_string(opcode));
3257 p->src = nir_src_for_ssa(&sampler->def);
3258 p->src_type = nir_tex_src_sampler_deref;
3259 p++;
3260 break;
3261 case nir_texop_txf:
3262 case nir_texop_txf_ms:
3263 case nir_texop_txs:
3264 case nir_texop_query_levels:
3265 case nir_texop_texture_samples:
3266 case nir_texop_samples_identical:
3267 case nir_texop_fragment_fetch_amd:
3268 case nir_texop_fragment_mask_fetch_amd:
3269 /* These don't */
3270 break;
3271 case nir_texop_txf_ms_fb:
3272 vtn_fail("unexpected nir_texop_txf_ms_fb");
3273 break;
3274 case nir_texop_txf_ms_mcs_intel:
3275 vtn_fail("unexpected nir_texop_txf_ms_mcs");
3276 break;
3277 case nir_texop_tex_prefetch:
3278 vtn_fail("unexpected nir_texop_tex_prefetch");
3279 break;
3280 case nir_texop_descriptor_amd:
3281 case nir_texop_sampler_descriptor_amd:
3282 vtn_fail("unexpected nir_texop_*descriptor_amd");
3283 break;
3284 case nir_texop_lod_bias_agx:
3285 case nir_texop_custom_border_color_agx:
3286 case nir_texop_has_custom_border_color_agx:
3287 vtn_fail("unexpected nir_texop_*_agx");
3288 break;
3289 case nir_texop_hdr_dim_nv:
3290 case nir_texop_tex_type_nv:
3291 vtn_fail("unexpected nir_texop_*_nv");
3292 break;
3293 }
3294
3295 unsigned idx = 4;
3296
3297 struct nir_def *coord;
3298 unsigned coord_components;
3299 switch (opcode) {
3300 case SpvOpImageSampleImplicitLod:
3301 case SpvOpImageSparseSampleImplicitLod:
3302 case SpvOpImageSampleExplicitLod:
3303 case SpvOpImageSparseSampleExplicitLod:
3304 case SpvOpImageSampleDrefImplicitLod:
3305 case SpvOpImageSparseSampleDrefImplicitLod:
3306 case SpvOpImageSampleDrefExplicitLod:
3307 case SpvOpImageSparseSampleDrefExplicitLod:
3308 case SpvOpImageSampleProjImplicitLod:
3309 case SpvOpImageSampleProjExplicitLod:
3310 case SpvOpImageSampleProjDrefImplicitLod:
3311 case SpvOpImageSampleProjDrefExplicitLod:
3312 case SpvOpImageFetch:
3313 case SpvOpImageSparseFetch:
3314 case SpvOpImageGather:
3315 case SpvOpImageSparseGather:
3316 case SpvOpImageDrefGather:
3317 case SpvOpImageSparseDrefGather:
3318 case SpvOpImageQueryLod:
3319 case SpvOpFragmentFetchAMD:
3320 case SpvOpFragmentMaskFetchAMD: {
3321 /* All these types have the coordinate as their first real argument */
3322 coord_components = glsl_get_sampler_dim_coordinate_components(sampler_dim);
3323
3324 if (is_array && texop != nir_texop_lod)
3325 coord_components++;
3326
3327 struct vtn_ssa_value *coord_val = vtn_ssa_value(b, w[idx++]);
3328 coord = coord_val->def;
3329 /* From the SPIR-V spec verxion 1.5, rev. 5:
3330 *
3331 * "Coordinate must be a scalar or vector of floating-point type. It
3332 * contains (u[, v] ... [, array layer]) as needed by the definition
3333 * of Sampled Image. It may be a vector larger than needed, but all
3334 * unused components appear after all used components."
3335 */
3336 vtn_fail_if(coord->num_components < coord_components,
3337 "Coordinate value passed has fewer components than sampler dimensionality.");
3338 p->src = nir_src_for_ssa(nir_trim_vector(&b->nb, coord, coord_components));
3339
3340 /* OpenCL allows integer sampling coordinates */
3341 if (glsl_type_is_integer(coord_val->type) &&
3342 opcode == SpvOpImageSampleExplicitLod) {
3343 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
3344 "Unless the Kernel capability is being used, the coordinate parameter "
3345 "OpImageSampleExplicitLod must be floating point.");
3346
3347 nir_def *coords[4];
3348 nir_def *f0_5 = nir_imm_float(&b->nb, 0.5);
3349 for (unsigned i = 0; i < coord_components; i++) {
3350 coords[i] = nir_i2f32(&b->nb, nir_channel(&b->nb, p->src.ssa, i));
3351
3352 if (!is_array || i != coord_components - 1)
3353 coords[i] = nir_fadd(&b->nb, coords[i], f0_5);
3354 }
3355
3356 p->src = nir_src_for_ssa(nir_vec(&b->nb, coords, coord_components));
3357 }
3358
3359 p->src_type = nir_tex_src_coord;
3360 p++;
3361 break;
3362 }
3363
3364 default:
3365 coord = NULL;
3366 coord_components = 0;
3367 break;
3368 }
3369
3370 switch (opcode) {
3371 case SpvOpImageSampleProjImplicitLod:
3372 case SpvOpImageSampleProjExplicitLod:
3373 case SpvOpImageSampleProjDrefImplicitLod:
3374 case SpvOpImageSampleProjDrefExplicitLod:
3375 /* These have the projector as the last coordinate component */
3376 p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components));
3377 p->src_type = nir_tex_src_projector;
3378 p++;
3379 break;
3380
3381 default:
3382 break;
3383 }
3384
3385 bool is_shadow = false;
3386 unsigned gather_component = 0;
3387 switch (opcode) {
3388 case SpvOpImageSampleDrefImplicitLod:
3389 case SpvOpImageSparseSampleDrefImplicitLod:
3390 case SpvOpImageSampleDrefExplicitLod:
3391 case SpvOpImageSparseSampleDrefExplicitLod:
3392 case SpvOpImageSampleProjDrefImplicitLod:
3393 case SpvOpImageSampleProjDrefExplicitLod:
3394 case SpvOpImageDrefGather:
3395 case SpvOpImageSparseDrefGather:
3396 /* These all have an explicit depth value as their next source */
3397 is_shadow = true;
3398 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
3399 break;
3400
3401 case SpvOpImageGather:
3402 case SpvOpImageSparseGather:
3403 /* This has a component as its next source */
3404 gather_component = vtn_constant_uint(b, w[idx++]);
3405 break;
3406
3407 default:
3408 break;
3409 }
3410
3411 bool is_sparse = false;
3412 switch (opcode) {
3413 case SpvOpImageSparseSampleImplicitLod:
3414 case SpvOpImageSparseSampleExplicitLod:
3415 case SpvOpImageSparseSampleDrefImplicitLod:
3416 case SpvOpImageSparseSampleDrefExplicitLod:
3417 case SpvOpImageSparseFetch:
3418 case SpvOpImageSparseGather:
3419 case SpvOpImageSparseDrefGather:
3420 is_sparse = true;
3421 break;
3422 default:
3423 break;
3424 }
3425
3426 /* For OpImageQuerySizeLod, we always have an LOD */
3427 if (opcode == SpvOpImageQuerySizeLod)
3428 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
3429
3430 /* For OpFragmentFetchAMD, we always have a multisample index */
3431 if (opcode == SpvOpFragmentFetchAMD)
3432 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
3433
3434 /* Now we need to handle some number of optional arguments */
3435 struct vtn_value *gather_offsets = NULL;
3436 uint32_t operands = SpvImageOperandsMaskNone;
3437 if (idx < count) {
3438 operands = w[idx];
3439
3440 if (operands & SpvImageOperandsBiasMask) {
3441 vtn_assert(texop == nir_texop_tex ||
3442 texop == nir_texop_tg4);
3443 if (texop == nir_texop_tex)
3444 texop = nir_texop_txb;
3445 uint32_t arg = image_operand_arg(b, w, count, idx,
3446 SpvImageOperandsBiasMask);
3447 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_bias);
3448 }
3449
3450 if (operands & SpvImageOperandsLodMask) {
3451 vtn_assert(texop == nir_texop_txl || texop == nir_texop_txf ||
3452 texop == nir_texop_txs || texop == nir_texop_tg4);
3453 uint32_t arg = image_operand_arg(b, w, count, idx,
3454 SpvImageOperandsLodMask);
3455 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_lod);
3456 }
3457
3458 if (operands & SpvImageOperandsGradMask) {
3459 vtn_assert(texop == nir_texop_txl);
3460 texop = nir_texop_txd;
3461 uint32_t arg = image_operand_arg(b, w, count, idx,
3462 SpvImageOperandsGradMask);
3463 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ddx);
3464 (*p++) = vtn_tex_src(b, w[arg + 1], nir_tex_src_ddy);
3465 }
3466
3467 vtn_fail_if(util_bitcount(operands & (SpvImageOperandsConstOffsetsMask |
3468 SpvImageOperandsOffsetMask |
3469 SpvImageOperandsConstOffsetMask)) > 1,
3470 "At most one of the ConstOffset, Offset, and ConstOffsets "
3471 "image operands can be used on a given instruction.");
3472
3473 if (operands & SpvImageOperandsOffsetMask) {
3474 uint32_t arg = image_operand_arg(b, w, count, idx,
3475 SpvImageOperandsOffsetMask);
3476 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3477 }
3478
3479 if (operands & SpvImageOperandsConstOffsetMask) {
3480 uint32_t arg = image_operand_arg(b, w, count, idx,
3481 SpvImageOperandsConstOffsetMask);
3482 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3483 }
3484
3485 if (operands & SpvImageOperandsConstOffsetsMask) {
3486 vtn_assert(texop == nir_texop_tg4);
3487 uint32_t arg = image_operand_arg(b, w, count, idx,
3488 SpvImageOperandsConstOffsetsMask);
3489 gather_offsets = vtn_value(b, w[arg], vtn_value_type_constant);
3490 }
3491
3492 if (operands & SpvImageOperandsSampleMask) {
3493 vtn_assert(texop == nir_texop_txf_ms);
3494 uint32_t arg = image_operand_arg(b, w, count, idx,
3495 SpvImageOperandsSampleMask);
3496 texop = nir_texop_txf_ms;
3497 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ms_index);
3498 }
3499
3500 if (operands & SpvImageOperandsMinLodMask) {
3501 vtn_assert(texop == nir_texop_tex ||
3502 texop == nir_texop_txb ||
3503 texop == nir_texop_txd);
3504 uint32_t arg = image_operand_arg(b, w, count, idx,
3505 SpvImageOperandsMinLodMask);
3506 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_min_lod);
3507 }
3508 }
3509
3510 struct vtn_type *ret_type = vtn_get_type(b, w[1]);
3511 struct vtn_type *struct_type = NULL;
3512 if (is_sparse) {
3513 vtn_assert(glsl_type_is_struct_or_ifc(ret_type->type));
3514 struct_type = ret_type;
3515 ret_type = struct_type->members[1];
3516 }
3517
3518 nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
3519 instr->op = texop;
3520
3521 memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
3522
3523 instr->coord_components = coord_components;
3524 instr->sampler_dim = sampler_dim;
3525 instr->is_array = is_array;
3526 instr->is_shadow = is_shadow;
3527 instr->is_sparse = is_sparse;
3528 instr->is_new_style_shadow =
3529 is_shadow && glsl_get_components(ret_type->type) == 1;
3530 instr->component = gather_component;
3531
3532 /* If SpvCapabilityImageGatherBiasLodAMD is enabled, texture gather without an explicit LOD
3533 * has an implicit one (instead of using level 0).
3534 */
3535 if (texop == nir_texop_tg4 &&
3536 b->enabled_capabilities.ImageGatherBiasLodAMD &&
3537 !(operands & SpvImageOperandsLodMask)) {
3538 instr->is_gather_implicit_lod = true;
3539 }
3540
3541 /* The Vulkan spec says:
3542 *
3543 * "If an instruction loads from or stores to a resource (including
3544 * atomics and image instructions) and the resource descriptor being
3545 * accessed is not dynamically uniform, then the operand corresponding
3546 * to that resource (e.g. the pointer or sampled image operand) must be
3547 * decorated with NonUniform."
3548 *
3549 * It's very careful to specify that the exact operand must be decorated
3550 * NonUniform. The SPIR-V parser is not expected to chase through long
3551 * chains to find the NonUniform decoration. It's either right there or we
3552 * can assume it doesn't exist.
3553 */
3554 enum gl_access_qualifier access = 0;
3555 vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
3556
3557 if (operands & SpvImageOperandsNontemporalMask)
3558 access |= ACCESS_NON_TEMPORAL;
3559
3560 if (sampler && b->options->force_tex_non_uniform)
3561 access |= ACCESS_NON_UNIFORM;
3562
3563 if (sampled_val->propagated_non_uniform)
3564 access |= ACCESS_NON_UNIFORM;
3565
3566 if (image && (access & ACCESS_NON_UNIFORM))
3567 instr->texture_non_uniform = true;
3568
3569 if (sampler && (access & ACCESS_NON_UNIFORM))
3570 instr->sampler_non_uniform = true;
3571
3572 /* for non-query ops, get dest_type from SPIR-V return type */
3573 if (dest_type == nir_type_invalid) {
3574 /* the return type should match the image type, unless the image type is
3575 * VOID (CL image), in which case the return type dictates the sampler
3576 */
3577 enum glsl_base_type sampler_base =
3578 glsl_get_sampler_result_type(image->type);
3579 enum glsl_base_type ret_base = glsl_get_base_type(ret_type->type);
3580 vtn_fail_if(sampler_base != ret_base && sampler_base != GLSL_TYPE_VOID,
3581 "SPIR-V return type mismatches image type. This is only valid "
3582 "for untyped images (OpenCL).");
3583 dest_type = nir_get_nir_type_for_glsl_base_type(ret_base);
3584 dest_type = get_image_type(b, dest_type, operands);
3585 }
3586
3587 instr->dest_type = dest_type;
3588
3589 nir_def_init(&instr->instr, &instr->def,
3590 nir_tex_instr_dest_size(instr), 32);
3591
3592 vtn_assert(glsl_get_vector_elements(ret_type->type) ==
3593 nir_tex_instr_result_size(instr));
3594
3595 if (gather_offsets) {
3596 vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array ||
3597 gather_offsets->type->length != 4,
3598 "ConstOffsets must be an array of size four of vectors "
3599 "of two integer components");
3600
3601 struct vtn_type *vec_type = gather_offsets->type->array_element;
3602 vtn_fail_if(vec_type->base_type != vtn_base_type_vector ||
3603 vec_type->length != 2 ||
3604 !glsl_type_is_integer(vec_type->type),
3605 "ConstOffsets must be an array of size four of vectors "
3606 "of two integer components");
3607
3608 unsigned bit_size = glsl_get_bit_size(vec_type->type);
3609 for (uint32_t i = 0; i < 4; i++) {
3610 const nir_const_value *cvec =
3611 gather_offsets->constant->elements[i]->values;
3612 for (uint32_t j = 0; j < 2; j++) {
3613 switch (bit_size) {
3614 case 8: instr->tg4_offsets[i][j] = cvec[j].i8; break;
3615 case 16: instr->tg4_offsets[i][j] = cvec[j].i16; break;
3616 case 32: instr->tg4_offsets[i][j] = cvec[j].i32; break;
3617 case 64: instr->tg4_offsets[i][j] = cvec[j].i64; break;
3618 default:
3619 vtn_fail("Unsupported bit size: %u", bit_size);
3620 }
3621 }
3622 }
3623 }
3624
3625 nir_builder_instr_insert(&b->nb, &instr->instr);
3626
3627 if (is_sparse) {
3628 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3629 unsigned result_size = glsl_get_vector_elements(ret_type->type);
3630 dest->elems[0]->def = nir_channel(&b->nb, &instr->def, result_size);
3631 dest->elems[1]->def = nir_trim_vector(&b->nb, &instr->def,
3632 result_size);
3633 vtn_push_ssa_value(b, w[2], dest);
3634 } else {
3635 vtn_push_nir_ssa(b, w[2], &instr->def);
3636 }
3637 }
3638
3639 static nir_atomic_op
translate_atomic_op(SpvOp opcode)3640 translate_atomic_op(SpvOp opcode)
3641 {
3642 switch (opcode) {
3643 case SpvOpAtomicExchange: return nir_atomic_op_xchg;
3644 case SpvOpAtomicCompareExchange: return nir_atomic_op_cmpxchg;
3645 case SpvOpAtomicCompareExchangeWeak: return nir_atomic_op_cmpxchg;
3646 case SpvOpAtomicIIncrement: return nir_atomic_op_iadd;
3647 case SpvOpAtomicIDecrement: return nir_atomic_op_iadd;
3648 case SpvOpAtomicIAdd: return nir_atomic_op_iadd;
3649 case SpvOpAtomicISub: return nir_atomic_op_iadd;
3650 case SpvOpAtomicSMin: return nir_atomic_op_imin;
3651 case SpvOpAtomicUMin: return nir_atomic_op_umin;
3652 case SpvOpAtomicSMax: return nir_atomic_op_imax;
3653 case SpvOpAtomicUMax: return nir_atomic_op_umax;
3654 case SpvOpAtomicAnd: return nir_atomic_op_iand;
3655 case SpvOpAtomicOr: return nir_atomic_op_ior;
3656 case SpvOpAtomicXor: return nir_atomic_op_ixor;
3657 case SpvOpAtomicFAddEXT: return nir_atomic_op_fadd;
3658 case SpvOpAtomicFMinEXT: return nir_atomic_op_fmin;
3659 case SpvOpAtomicFMaxEXT: return nir_atomic_op_fmax;
3660 case SpvOpAtomicFlagTestAndSet: return nir_atomic_op_cmpxchg;
3661 default:
3662 unreachable("Invalid atomic");
3663 }
3664 }
3665
3666 static void
fill_common_atomic_sources(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_src * src)3667 fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
3668 const uint32_t *w, nir_src *src)
3669 {
3670 const struct glsl_type *type = vtn_get_type(b, w[1])->type;
3671 unsigned bit_size = glsl_get_bit_size(type);
3672
3673 switch (opcode) {
3674 case SpvOpAtomicIIncrement:
3675 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 1, bit_size));
3676 break;
3677
3678 case SpvOpAtomicIDecrement:
3679 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, bit_size));
3680 break;
3681
3682 case SpvOpAtomicISub:
3683 src[0] =
3684 nir_src_for_ssa(nir_ineg(&b->nb, vtn_get_nir_ssa(b, w[6])));
3685 break;
3686
3687 case SpvOpAtomicCompareExchange:
3688 case SpvOpAtomicCompareExchangeWeak:
3689 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[8]));
3690 src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[7]));
3691 break;
3692
3693 case SpvOpAtomicExchange:
3694 case SpvOpAtomicIAdd:
3695 case SpvOpAtomicSMin:
3696 case SpvOpAtomicUMin:
3697 case SpvOpAtomicSMax:
3698 case SpvOpAtomicUMax:
3699 case SpvOpAtomicAnd:
3700 case SpvOpAtomicOr:
3701 case SpvOpAtomicXor:
3702 case SpvOpAtomicFAddEXT:
3703 case SpvOpAtomicFMinEXT:
3704 case SpvOpAtomicFMaxEXT:
3705 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6]));
3706 break;
3707
3708 default:
3709 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3710 }
3711 }
3712
3713 static nir_def *
get_image_coord(struct vtn_builder * b,uint32_t value)3714 get_image_coord(struct vtn_builder *b, uint32_t value)
3715 {
3716 nir_def *coord = vtn_get_nir_ssa(b, value);
3717 /* The image_load_store intrinsics assume a 4-dim coordinate */
3718 return nir_pad_vec4(&b->nb, coord);
3719 }
3720
3721 static void
vtn_handle_image(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3722 vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
3723 const uint32_t *w, unsigned count)
3724 {
3725 /* Just get this one out of the way */
3726 if (opcode == SpvOpImageTexelPointer) {
3727 struct vtn_value *val =
3728 vtn_push_value(b, w[2], vtn_value_type_image_pointer);
3729 val->image = vtn_alloc(b, struct vtn_image_pointer);
3730
3731 val->image->image = vtn_nir_deref(b, w[3]);
3732 val->image->coord = get_image_coord(b, w[4]);
3733 val->image->sample = vtn_get_nir_ssa(b, w[5]);
3734 val->image->lod = nir_imm_int(&b->nb, 0);
3735 return;
3736 }
3737
3738 struct vtn_image_pointer image;
3739 SpvScope scope = SpvScopeInvocation;
3740 SpvMemorySemanticsMask semantics = 0;
3741 SpvImageOperandsMask operands = SpvImageOperandsMaskNone;
3742
3743 enum gl_access_qualifier access = 0;
3744
3745 struct vtn_value *res_val;
3746 switch (opcode) {
3747 case SpvOpAtomicExchange:
3748 case SpvOpAtomicCompareExchange:
3749 case SpvOpAtomicCompareExchangeWeak:
3750 case SpvOpAtomicIIncrement:
3751 case SpvOpAtomicIDecrement:
3752 case SpvOpAtomicIAdd:
3753 case SpvOpAtomicISub:
3754 case SpvOpAtomicLoad:
3755 case SpvOpAtomicSMin:
3756 case SpvOpAtomicUMin:
3757 case SpvOpAtomicSMax:
3758 case SpvOpAtomicUMax:
3759 case SpvOpAtomicAnd:
3760 case SpvOpAtomicOr:
3761 case SpvOpAtomicXor:
3762 case SpvOpAtomicFAddEXT:
3763 case SpvOpAtomicFMinEXT:
3764 case SpvOpAtomicFMaxEXT:
3765 res_val = vtn_value(b, w[3], vtn_value_type_image_pointer);
3766 image = *res_val->image;
3767 scope = vtn_constant_uint(b, w[4]);
3768 semantics = vtn_constant_uint(b, w[5]);
3769 access |= ACCESS_COHERENT;
3770 break;
3771
3772 case SpvOpAtomicStore:
3773 res_val = vtn_value(b, w[1], vtn_value_type_image_pointer);
3774 image = *res_val->image;
3775 scope = vtn_constant_uint(b, w[2]);
3776 semantics = vtn_constant_uint(b, w[3]);
3777 access |= ACCESS_COHERENT;
3778 break;
3779
3780 case SpvOpImageQuerySizeLod:
3781 res_val = vtn_untyped_value(b, w[3]);
3782 image.image = vtn_get_image(b, w[3], &access);
3783 image.coord = NULL;
3784 image.sample = NULL;
3785 image.lod = vtn_ssa_value(b, w[4])->def;
3786 break;
3787
3788 case SpvOpImageQueryFormat:
3789 case SpvOpImageQueryLevels:
3790 case SpvOpImageQueryOrder:
3791 case SpvOpImageQuerySamples:
3792 case SpvOpImageQuerySize:
3793 res_val = vtn_untyped_value(b, w[3]);
3794 image.image = vtn_get_image(b, w[3], &access);
3795 image.coord = NULL;
3796 image.sample = NULL;
3797 image.lod = NULL;
3798 break;
3799
3800 case SpvOpImageRead:
3801 case SpvOpImageSparseRead: {
3802 res_val = vtn_untyped_value(b, w[3]);
3803 image.image = vtn_get_image(b, w[3], &access);
3804 image.coord = get_image_coord(b, w[4]);
3805
3806 operands = count > 5 ? w[5] : SpvImageOperandsMaskNone;
3807
3808 if (operands & SpvImageOperandsSampleMask) {
3809 uint32_t arg = image_operand_arg(b, w, count, 5,
3810 SpvImageOperandsSampleMask);
3811 image.sample = vtn_get_nir_ssa(b, w[arg]);
3812 } else {
3813 image.sample = nir_undef(&b->nb, 1, 32);
3814 }
3815
3816 if (operands & SpvImageOperandsMakeTexelVisibleMask) {
3817 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3818 "MakeTexelVisible requires NonPrivateTexel to also be set.");
3819 uint32_t arg = image_operand_arg(b, w, count, 5,
3820 SpvImageOperandsMakeTexelVisibleMask);
3821 semantics = SpvMemorySemanticsMakeVisibleMask;
3822 scope = vtn_constant_uint(b, w[arg]);
3823 }
3824
3825 if (operands & SpvImageOperandsLodMask) {
3826 uint32_t arg = image_operand_arg(b, w, count, 5,
3827 SpvImageOperandsLodMask);
3828 image.lod = vtn_get_nir_ssa(b, w[arg]);
3829 } else {
3830 image.lod = nir_imm_int(&b->nb, 0);
3831 }
3832
3833 if (operands & SpvImageOperandsVolatileTexelMask)
3834 access |= ACCESS_VOLATILE;
3835 if (operands & SpvImageOperandsNontemporalMask)
3836 access |= ACCESS_NON_TEMPORAL;
3837
3838 break;
3839 }
3840
3841 case SpvOpImageWrite: {
3842 res_val = vtn_untyped_value(b, w[1]);
3843 image.image = vtn_get_image(b, w[1], &access);
3844 image.coord = get_image_coord(b, w[2]);
3845
3846 /* texel = w[3] */
3847
3848 operands = count > 4 ? w[4] : SpvImageOperandsMaskNone;
3849
3850 if (operands & SpvImageOperandsSampleMask) {
3851 uint32_t arg = image_operand_arg(b, w, count, 4,
3852 SpvImageOperandsSampleMask);
3853 image.sample = vtn_get_nir_ssa(b, w[arg]);
3854 } else {
3855 image.sample = nir_undef(&b->nb, 1, 32);
3856 }
3857
3858 if (operands & SpvImageOperandsMakeTexelAvailableMask) {
3859 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3860 "MakeTexelAvailable requires NonPrivateTexel to also be set.");
3861 uint32_t arg = image_operand_arg(b, w, count, 4,
3862 SpvImageOperandsMakeTexelAvailableMask);
3863 semantics = SpvMemorySemanticsMakeAvailableMask;
3864 scope = vtn_constant_uint(b, w[arg]);
3865 }
3866
3867 if (operands & SpvImageOperandsLodMask) {
3868 uint32_t arg = image_operand_arg(b, w, count, 4,
3869 SpvImageOperandsLodMask);
3870 image.lod = vtn_get_nir_ssa(b, w[arg]);
3871 } else {
3872 image.lod = nir_imm_int(&b->nb, 0);
3873 }
3874
3875 if (operands & SpvImageOperandsVolatileTexelMask)
3876 access |= ACCESS_VOLATILE;
3877 if (operands & SpvImageOperandsNontemporalMask)
3878 access |= ACCESS_NON_TEMPORAL;
3879
3880 break;
3881 }
3882
3883 default:
3884 vtn_fail_with_opcode("Invalid image opcode", opcode);
3885 }
3886
3887 if (semantics & SpvMemorySemanticsVolatileMask)
3888 access |= ACCESS_VOLATILE;
3889
3890 nir_intrinsic_op op;
3891 switch (opcode) {
3892 #define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_deref_##N; break;
3893 OP(ImageQuerySize, size)
3894 OP(ImageQuerySizeLod, size)
3895 OP(ImageRead, load)
3896 OP(ImageSparseRead, sparse_load)
3897 OP(ImageWrite, store)
3898 OP(AtomicLoad, load)
3899 OP(AtomicStore, store)
3900 OP(AtomicExchange, atomic)
3901 OP(AtomicCompareExchange, atomic_swap)
3902 OP(AtomicCompareExchangeWeak, atomic_swap)
3903 OP(AtomicIIncrement, atomic)
3904 OP(AtomicIDecrement, atomic)
3905 OP(AtomicIAdd, atomic)
3906 OP(AtomicISub, atomic)
3907 OP(AtomicSMin, atomic)
3908 OP(AtomicUMin, atomic)
3909 OP(AtomicSMax, atomic)
3910 OP(AtomicUMax, atomic)
3911 OP(AtomicAnd, atomic)
3912 OP(AtomicOr, atomic)
3913 OP(AtomicXor, atomic)
3914 OP(AtomicFAddEXT, atomic)
3915 OP(AtomicFMinEXT, atomic)
3916 OP(AtomicFMaxEXT, atomic)
3917 OP(ImageQueryFormat, format)
3918 OP(ImageQueryLevels, levels)
3919 OP(ImageQueryOrder, order)
3920 OP(ImageQuerySamples, samples)
3921 #undef OP
3922 default:
3923 vtn_fail_with_opcode("Invalid image opcode", opcode);
3924 }
3925
3926 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
3927 if (nir_intrinsic_has_atomic_op(intrin))
3928 nir_intrinsic_set_atomic_op(intrin, translate_atomic_op(opcode));
3929
3930 intrin->src[0] = nir_src_for_ssa(&image.image->def);
3931 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(image.image->type));
3932 nir_intrinsic_set_image_array(intrin,
3933 glsl_sampler_type_is_array(image.image->type));
3934
3935 switch (opcode) {
3936 case SpvOpImageQueryLevels:
3937 case SpvOpImageQuerySamples:
3938 case SpvOpImageQuerySize:
3939 case SpvOpImageQuerySizeLod:
3940 case SpvOpImageQueryFormat:
3941 case SpvOpImageQueryOrder:
3942 break;
3943 default:
3944 /* The image coordinate is always 4 components but we may not have that
3945 * many. Swizzle to compensate.
3946 */
3947 intrin->src[1] = nir_src_for_ssa(nir_pad_vec4(&b->nb, image.coord));
3948 intrin->src[2] = nir_src_for_ssa(image.sample);
3949 break;
3950 }
3951
3952 /* The Vulkan spec says:
3953 *
3954 * "If an instruction loads from or stores to a resource (including
3955 * atomics and image instructions) and the resource descriptor being
3956 * accessed is not dynamically uniform, then the operand corresponding
3957 * to that resource (e.g. the pointer or sampled image operand) must be
3958 * decorated with NonUniform."
3959 *
3960 * It's very careful to specify that the exact operand must be decorated
3961 * NonUniform. The SPIR-V parser is not expected to chase through long
3962 * chains to find the NonUniform decoration. It's either right there or we
3963 * can assume it doesn't exist.
3964 */
3965 vtn_foreach_decoration(b, res_val, non_uniform_decoration_cb, &access);
3966 nir_intrinsic_set_access(intrin, access);
3967
3968 switch (opcode) {
3969 case SpvOpImageQueryLevels:
3970 case SpvOpImageQuerySamples:
3971 case SpvOpImageQueryFormat:
3972 case SpvOpImageQueryOrder:
3973 /* No additional sources */
3974 break;
3975 case SpvOpImageQuerySize:
3976 intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0));
3977 break;
3978 case SpvOpImageQuerySizeLod:
3979 intrin->src[1] = nir_src_for_ssa(image.lod);
3980 break;
3981 case SpvOpAtomicLoad:
3982 case SpvOpImageRead:
3983 case SpvOpImageSparseRead:
3984 /* Only OpImageRead can support a lod parameter if
3985 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3986 * intrinsics definition for atomics requires us to set it for
3987 * OpAtomicLoad.
3988 */
3989 intrin->src[3] = nir_src_for_ssa(image.lod);
3990 break;
3991 case SpvOpAtomicStore:
3992 case SpvOpImageWrite: {
3993 const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3];
3994 struct vtn_ssa_value *value = vtn_ssa_value(b, value_id);
3995 /* nir_intrinsic_image_deref_store always takes a vec4 value */
3996 assert(op == nir_intrinsic_image_deref_store);
3997 intrin->num_components = 4;
3998 intrin->src[3] = nir_src_for_ssa(nir_pad_vec4(&b->nb, value->def));
3999 /* Only OpImageWrite can support a lod parameter if
4000 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
4001 * intrinsics definition for atomics requires us to set it for
4002 * OpAtomicStore.
4003 */
4004 intrin->src[4] = nir_src_for_ssa(image.lod);
4005
4006 nir_alu_type src_type =
4007 get_image_type(b, nir_get_nir_type_for_glsl_type(value->type), operands);
4008 nir_intrinsic_set_src_type(intrin, src_type);
4009 break;
4010 }
4011
4012 case SpvOpAtomicCompareExchange:
4013 case SpvOpAtomicCompareExchangeWeak:
4014 case SpvOpAtomicIIncrement:
4015 case SpvOpAtomicIDecrement:
4016 case SpvOpAtomicExchange:
4017 case SpvOpAtomicIAdd:
4018 case SpvOpAtomicISub:
4019 case SpvOpAtomicSMin:
4020 case SpvOpAtomicUMin:
4021 case SpvOpAtomicSMax:
4022 case SpvOpAtomicUMax:
4023 case SpvOpAtomicAnd:
4024 case SpvOpAtomicOr:
4025 case SpvOpAtomicXor:
4026 case SpvOpAtomicFAddEXT:
4027 case SpvOpAtomicFMinEXT:
4028 case SpvOpAtomicFMaxEXT:
4029 fill_common_atomic_sources(b, opcode, w, &intrin->src[3]);
4030 break;
4031
4032 default:
4033 vtn_fail_with_opcode("Invalid image opcode", opcode);
4034 }
4035
4036 /* Image operations implicitly have the Image storage memory semantics. */
4037 semantics |= SpvMemorySemanticsImageMemoryMask;
4038
4039 SpvMemorySemanticsMask before_semantics;
4040 SpvMemorySemanticsMask after_semantics;
4041 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
4042
4043 if (before_semantics)
4044 vtn_emit_memory_barrier(b, scope, before_semantics);
4045
4046 if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) {
4047 struct vtn_type *type = vtn_get_type(b, w[1]);
4048 struct vtn_type *struct_type = NULL;
4049 if (opcode == SpvOpImageSparseRead) {
4050 vtn_assert(glsl_type_is_struct_or_ifc(type->type));
4051 struct_type = type;
4052 type = struct_type->members[1];
4053 }
4054
4055 unsigned dest_components = glsl_get_vector_elements(type->type);
4056 if (opcode == SpvOpImageSparseRead)
4057 dest_components++;
4058
4059 if (nir_intrinsic_infos[op].dest_components == 0)
4060 intrin->num_components = dest_components;
4061
4062 unsigned bit_size = glsl_get_bit_size(type->type);
4063 if (opcode == SpvOpImageQuerySize ||
4064 opcode == SpvOpImageQuerySizeLod)
4065 bit_size = MIN2(bit_size, 32);
4066
4067 nir_def_init(&intrin->instr, &intrin->def,
4068 nir_intrinsic_dest_components(intrin), bit_size);
4069
4070 nir_builder_instr_insert(&b->nb, &intrin->instr);
4071
4072 nir_def *result = nir_trim_vector(&b->nb, &intrin->def,
4073 dest_components);
4074
4075 if (opcode == SpvOpImageQuerySize ||
4076 opcode == SpvOpImageQuerySizeLod)
4077 result = nir_u2uN(&b->nb, result, glsl_get_bit_size(type->type));
4078
4079 if (opcode == SpvOpImageSparseRead) {
4080 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
4081 unsigned res_type_size = glsl_get_vector_elements(type->type);
4082 dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size);
4083 if (intrin->def.bit_size != 32)
4084 dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def);
4085 dest->elems[1]->def = nir_trim_vector(&b->nb, result, res_type_size);
4086 vtn_push_ssa_value(b, w[2], dest);
4087 } else {
4088 vtn_push_nir_ssa(b, w[2], result);
4089 }
4090
4091 if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead ||
4092 opcode == SpvOpAtomicLoad) {
4093 nir_alu_type dest_type =
4094 get_image_type(b, nir_get_nir_type_for_glsl_type(type->type), operands);
4095 nir_intrinsic_set_dest_type(intrin, dest_type);
4096 }
4097 } else {
4098 nir_builder_instr_insert(&b->nb, &intrin->instr);
4099 }
4100
4101 if (after_semantics)
4102 vtn_emit_memory_barrier(b, scope, after_semantics);
4103 }
4104
4105 static nir_intrinsic_op
get_uniform_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)4106 get_uniform_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
4107 {
4108 switch (opcode) {
4109 #define OP(S, N) case SpvOp##S: return nir_intrinsic_atomic_counter_ ##N;
4110 OP(AtomicLoad, read_deref)
4111 OP(AtomicExchange, exchange)
4112 OP(AtomicCompareExchange, comp_swap)
4113 OP(AtomicCompareExchangeWeak, comp_swap)
4114 OP(AtomicIIncrement, inc_deref)
4115 OP(AtomicIDecrement, post_dec_deref)
4116 OP(AtomicIAdd, add_deref)
4117 OP(AtomicISub, add_deref)
4118 OP(AtomicUMin, min_deref)
4119 OP(AtomicUMax, max_deref)
4120 OP(AtomicAnd, and_deref)
4121 OP(AtomicOr, or_deref)
4122 OP(AtomicXor, xor_deref)
4123 #undef OP
4124 default:
4125 /* We left the following out: AtomicStore, AtomicSMin and
4126 * AtomicSmax. Right now there are not nir intrinsics for them. At this
4127 * moment Atomic Counter support is needed for ARB_spirv support, so is
4128 * only need to support GLSL Atomic Counters that are uints and don't
4129 * allow direct storage.
4130 */
4131 vtn_fail("Invalid uniform atomic");
4132 }
4133 }
4134
4135 static nir_intrinsic_op
get_deref_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)4136 get_deref_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
4137 {
4138 switch (opcode) {
4139 case SpvOpAtomicLoad: return nir_intrinsic_load_deref;
4140 case SpvOpAtomicFlagClear:
4141 case SpvOpAtomicStore: return nir_intrinsic_store_deref;
4142 #define OP(S, N) case SpvOp##S: return nir_intrinsic_deref_##N;
4143 OP(AtomicExchange, atomic)
4144 OP(AtomicCompareExchange, atomic_swap)
4145 OP(AtomicCompareExchangeWeak, atomic_swap)
4146 OP(AtomicIIncrement, atomic)
4147 OP(AtomicIDecrement, atomic)
4148 OP(AtomicIAdd, atomic)
4149 OP(AtomicISub, atomic)
4150 OP(AtomicSMin, atomic)
4151 OP(AtomicUMin, atomic)
4152 OP(AtomicSMax, atomic)
4153 OP(AtomicUMax, atomic)
4154 OP(AtomicAnd, atomic)
4155 OP(AtomicOr, atomic)
4156 OP(AtomicXor, atomic)
4157 OP(AtomicFAddEXT, atomic)
4158 OP(AtomicFMinEXT, atomic)
4159 OP(AtomicFMaxEXT, atomic)
4160 OP(AtomicFlagTestAndSet, atomic_swap)
4161 #undef OP
4162 default:
4163 vtn_fail_with_opcode("Invalid shared atomic", opcode);
4164 }
4165 }
4166
4167 /*
4168 * Handles shared atomics, ssbo atomics and atomic counters.
4169 */
4170 static void
vtn_handle_atomics(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4171 vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
4172 const uint32_t *w, UNUSED unsigned count)
4173 {
4174 struct vtn_pointer *ptr;
4175 nir_intrinsic_instr *atomic;
4176
4177 SpvScope scope = SpvScopeInvocation;
4178 SpvMemorySemanticsMask semantics = 0;
4179 enum gl_access_qualifier access = 0;
4180
4181 switch (opcode) {
4182 case SpvOpAtomicLoad:
4183 case SpvOpAtomicExchange:
4184 case SpvOpAtomicCompareExchange:
4185 case SpvOpAtomicCompareExchangeWeak:
4186 case SpvOpAtomicIIncrement:
4187 case SpvOpAtomicIDecrement:
4188 case SpvOpAtomicIAdd:
4189 case SpvOpAtomicISub:
4190 case SpvOpAtomicSMin:
4191 case SpvOpAtomicUMin:
4192 case SpvOpAtomicSMax:
4193 case SpvOpAtomicUMax:
4194 case SpvOpAtomicAnd:
4195 case SpvOpAtomicOr:
4196 case SpvOpAtomicXor:
4197 case SpvOpAtomicFAddEXT:
4198 case SpvOpAtomicFMinEXT:
4199 case SpvOpAtomicFMaxEXT:
4200 case SpvOpAtomicFlagTestAndSet:
4201 ptr = vtn_pointer(b, w[3]);
4202 scope = vtn_constant_uint(b, w[4]);
4203 semantics = vtn_constant_uint(b, w[5]);
4204 break;
4205 case SpvOpAtomicFlagClear:
4206 case SpvOpAtomicStore:
4207 ptr = vtn_pointer(b, w[1]);
4208 scope = vtn_constant_uint(b, w[2]);
4209 semantics = vtn_constant_uint(b, w[3]);
4210 break;
4211
4212 default:
4213 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
4214 }
4215
4216 if (semantics & SpvMemorySemanticsVolatileMask)
4217 access |= ACCESS_VOLATILE;
4218
4219 /* uniform as "atomic counter uniform" */
4220 if (ptr->mode == vtn_variable_mode_atomic_counter) {
4221 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
4222 nir_intrinsic_op op = get_uniform_nir_atomic_op(b, opcode);
4223 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
4224 atomic->src[0] = nir_src_for_ssa(&deref->def);
4225
4226 /* SSBO needs to initialize index/offset. In this case we don't need to,
4227 * as that info is already stored on the ptr->var->var nir_variable (see
4228 * vtn_create_variable)
4229 */
4230
4231 switch (opcode) {
4232 case SpvOpAtomicLoad:
4233 case SpvOpAtomicExchange:
4234 case SpvOpAtomicCompareExchange:
4235 case SpvOpAtomicCompareExchangeWeak:
4236 case SpvOpAtomicIIncrement:
4237 case SpvOpAtomicIDecrement:
4238 case SpvOpAtomicIAdd:
4239 case SpvOpAtomicISub:
4240 case SpvOpAtomicSMin:
4241 case SpvOpAtomicUMin:
4242 case SpvOpAtomicSMax:
4243 case SpvOpAtomicUMax:
4244 case SpvOpAtomicAnd:
4245 case SpvOpAtomicOr:
4246 case SpvOpAtomicXor:
4247 /* Nothing: we don't need to call fill_common_atomic_sources here, as
4248 * atomic counter uniforms doesn't have sources
4249 */
4250 break;
4251
4252 default:
4253 unreachable("Invalid SPIR-V atomic");
4254
4255 }
4256 } else {
4257 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
4258 const struct glsl_type *deref_type = deref->type;
4259 nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode);
4260 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
4261 atomic->src[0] = nir_src_for_ssa(&deref->def);
4262
4263 if (nir_intrinsic_has_atomic_op(atomic))
4264 nir_intrinsic_set_atomic_op(atomic, translate_atomic_op(opcode));
4265
4266 if (ptr->mode != vtn_variable_mode_workgroup)
4267 access |= ACCESS_COHERENT;
4268
4269 nir_intrinsic_set_access(atomic, access);
4270
4271 switch (opcode) {
4272 case SpvOpAtomicLoad:
4273 atomic->num_components = glsl_get_vector_elements(deref_type);
4274 break;
4275
4276 case SpvOpAtomicStore:
4277 atomic->num_components = glsl_get_vector_elements(deref_type);
4278 nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
4279 atomic->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
4280 break;
4281
4282 case SpvOpAtomicFlagClear:
4283 atomic->num_components = 1;
4284 nir_intrinsic_set_write_mask(atomic, 1);
4285 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4286 break;
4287 case SpvOpAtomicFlagTestAndSet:
4288 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
4289 atomic->src[2] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, 32));
4290 break;
4291 case SpvOpAtomicExchange:
4292 case SpvOpAtomicCompareExchange:
4293 case SpvOpAtomicCompareExchangeWeak:
4294 case SpvOpAtomicIIncrement:
4295 case SpvOpAtomicIDecrement:
4296 case SpvOpAtomicIAdd:
4297 case SpvOpAtomicISub:
4298 case SpvOpAtomicSMin:
4299 case SpvOpAtomicUMin:
4300 case SpvOpAtomicSMax:
4301 case SpvOpAtomicUMax:
4302 case SpvOpAtomicAnd:
4303 case SpvOpAtomicOr:
4304 case SpvOpAtomicXor:
4305 case SpvOpAtomicFAddEXT:
4306 case SpvOpAtomicFMinEXT:
4307 case SpvOpAtomicFMaxEXT:
4308 fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
4309 break;
4310
4311 default:
4312 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
4313 }
4314 }
4315
4316 /* Atomic ordering operations will implicitly apply to the atomic operation
4317 * storage class, so include that too.
4318 */
4319 semantics |= vtn_mode_to_memory_semantics(ptr->mode);
4320
4321 SpvMemorySemanticsMask before_semantics;
4322 SpvMemorySemanticsMask after_semantics;
4323 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
4324
4325 if (before_semantics)
4326 vtn_emit_memory_barrier(b, scope, before_semantics);
4327
4328 if (opcode != SpvOpAtomicStore && opcode != SpvOpAtomicFlagClear) {
4329 struct vtn_type *type = vtn_get_type(b, w[1]);
4330
4331 if (opcode == SpvOpAtomicFlagTestAndSet) {
4332 /* map atomic flag to a 32-bit atomic integer. */
4333 nir_def_init(&atomic->instr, &atomic->def, 1, 32);
4334 } else {
4335 nir_def_init(&atomic->instr, &atomic->def,
4336 glsl_get_vector_elements(type->type),
4337 glsl_get_bit_size(type->type));
4338
4339 vtn_push_nir_ssa(b, w[2], &atomic->def);
4340 }
4341 }
4342
4343 nir_builder_instr_insert(&b->nb, &atomic->instr);
4344
4345 if (opcode == SpvOpAtomicFlagTestAndSet) {
4346 vtn_push_nir_ssa(b, w[2], nir_i2b(&b->nb, &atomic->def));
4347 }
4348 if (after_semantics)
4349 vtn_emit_memory_barrier(b, scope, after_semantics);
4350 }
4351
4352 static nir_alu_instr *
create_vec(struct vtn_builder * b,unsigned num_components,unsigned bit_size)4353 create_vec(struct vtn_builder *b, unsigned num_components, unsigned bit_size)
4354 {
4355 nir_op op = nir_op_vec(num_components);
4356 nir_alu_instr *vec = nir_alu_instr_create(b->shader, op);
4357 nir_def_init(&vec->instr, &vec->def, num_components, bit_size);
4358
4359 return vec;
4360 }
4361
4362 struct vtn_ssa_value *
vtn_ssa_transpose(struct vtn_builder * b,struct vtn_ssa_value * src)4363 vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
4364 {
4365 if (src->transposed)
4366 return src->transposed;
4367
4368 struct vtn_ssa_value *dest =
4369 vtn_create_ssa_value(b, glsl_transposed_type(src->type));
4370
4371 for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
4372 if (glsl_type_is_vector_or_scalar(src->type)) {
4373 dest->elems[i]->def = nir_channel(&b->nb, src->def, i);
4374 } else {
4375 unsigned cols = glsl_get_matrix_columns(src->type);
4376 nir_scalar srcs[NIR_MAX_MATRIX_COLUMNS];
4377 for (unsigned j = 0; j < cols; j++) {
4378 srcs[j] = nir_get_scalar(src->elems[j]->def, i);
4379 }
4380 dest->elems[i]->def = nir_vec_scalars(&b->nb, srcs, cols);
4381 }
4382 }
4383
4384 dest->transposed = src;
4385
4386 return dest;
4387 }
4388
4389 static nir_def *
vtn_vector_shuffle(struct vtn_builder * b,unsigned num_components,nir_def * src0,nir_def * src1,const uint32_t * indices)4390 vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
4391 nir_def *src0, nir_def *src1,
4392 const uint32_t *indices)
4393 {
4394 nir_alu_instr *vec = create_vec(b, num_components, src0->bit_size);
4395
4396 for (unsigned i = 0; i < num_components; i++) {
4397 uint32_t index = indices[i];
4398 unsigned total_components = src0->num_components + src1->num_components;
4399 vtn_fail_if(index != 0xffffffff && index >= total_components,
4400 "OpVectorShuffle: All Component literals must either be "
4401 "FFFFFFFF or in [0, N - 1] (inclusive)");
4402
4403 if (index == 0xffffffff) {
4404 vec->src[i].src =
4405 nir_src_for_ssa(nir_undef(&b->nb, 1, src0->bit_size));
4406 } else if (index < src0->num_components) {
4407 vec->src[i].src = nir_src_for_ssa(src0);
4408 vec->src[i].swizzle[0] = index;
4409 } else {
4410 vec->src[i].src = nir_src_for_ssa(src1);
4411 vec->src[i].swizzle[0] = index - src0->num_components;
4412 }
4413 }
4414
4415 nir_builder_instr_insert(&b->nb, &vec->instr);
4416
4417 return &vec->def;
4418 }
4419
4420 /*
4421 * Concatentates a number of vectors/scalars together to produce a vector
4422 */
4423 static nir_def *
vtn_vector_construct(struct vtn_builder * b,unsigned num_components,unsigned num_srcs,nir_def ** srcs)4424 vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
4425 unsigned num_srcs, nir_def **srcs)
4426 {
4427 nir_alu_instr *vec = create_vec(b, num_components, srcs[0]->bit_size);
4428
4429 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4430 *
4431 * "When constructing a vector, there must be at least two Constituent
4432 * operands."
4433 */
4434 vtn_assert(num_srcs >= 2);
4435
4436 unsigned dest_idx = 0;
4437 for (unsigned i = 0; i < num_srcs; i++) {
4438 nir_def *src = srcs[i];
4439 vtn_assert(dest_idx + src->num_components <= num_components);
4440 for (unsigned j = 0; j < src->num_components; j++) {
4441 vec->src[dest_idx].src = nir_src_for_ssa(src);
4442 vec->src[dest_idx].swizzle[0] = j;
4443 dest_idx++;
4444 }
4445 }
4446
4447 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
4448 *
4449 * "When constructing a vector, the total number of components in all
4450 * the operands must equal the number of components in Result Type."
4451 */
4452 vtn_assert(dest_idx == num_components);
4453
4454 nir_builder_instr_insert(&b->nb, &vec->instr);
4455
4456 return &vec->def;
4457 }
4458
4459 static struct vtn_ssa_value *
vtn_composite_copy(struct vtn_builder * b,struct vtn_ssa_value * src)4460 vtn_composite_copy(struct vtn_builder *b, struct vtn_ssa_value *src)
4461 {
4462 assert(!src->is_variable);
4463
4464 struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
4465 dest->type = src->type;
4466
4467 if (glsl_type_is_vector_or_scalar(src->type)) {
4468 dest->def = src->def;
4469 } else {
4470 unsigned elems = glsl_get_length(src->type);
4471
4472 dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
4473 for (unsigned i = 0; i < elems; i++)
4474 dest->elems[i] = vtn_composite_copy(b, src->elems[i]);
4475 }
4476
4477 return dest;
4478 }
4479
4480 static struct vtn_ssa_value *
vtn_composite_insert(struct vtn_builder * b,struct vtn_ssa_value * src,struct vtn_ssa_value * insert,const uint32_t * indices,unsigned num_indices)4481 vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
4482 struct vtn_ssa_value *insert, const uint32_t *indices,
4483 unsigned num_indices)
4484 {
4485 if (glsl_type_is_cmat(src->type))
4486 return vtn_cooperative_matrix_insert(b, src, insert, indices, num_indices);
4487
4488 struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
4489
4490 struct vtn_ssa_value *cur = dest;
4491 unsigned i;
4492 for (i = 0; i < num_indices - 1; i++) {
4493 /* If we got a vector here, that means the next index will be trying to
4494 * dereference a scalar.
4495 */
4496 vtn_fail_if(glsl_type_is_vector_or_scalar(cur->type),
4497 "OpCompositeInsert has too many indices.");
4498 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4499 "All indices in an OpCompositeInsert must be in-bounds");
4500 cur = cur->elems[indices[i]];
4501 }
4502
4503 if (glsl_type_is_vector_or_scalar(cur->type)) {
4504 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4505 "All indices in an OpCompositeInsert must be in-bounds");
4506
4507 /* According to the SPIR-V spec, OpCompositeInsert may work down to
4508 * the component granularity. In that case, the last index will be
4509 * the index to insert the scalar into the vector.
4510 */
4511
4512 cur->def = nir_vector_insert_imm(&b->nb, cur->def, insert->def, indices[i]);
4513 } else {
4514 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4515 "All indices in an OpCompositeInsert must be in-bounds");
4516 cur->elems[indices[i]] = insert;
4517 }
4518
4519 return dest;
4520 }
4521
4522 static struct vtn_ssa_value *
vtn_composite_extract(struct vtn_builder * b,struct vtn_ssa_value * src,const uint32_t * indices,unsigned num_indices)4523 vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
4524 const uint32_t *indices, unsigned num_indices)
4525 {
4526 if (glsl_type_is_cmat(src->type))
4527 return vtn_cooperative_matrix_extract(b, src, indices, num_indices);
4528
4529 struct vtn_ssa_value *cur = src;
4530 for (unsigned i = 0; i < num_indices; i++) {
4531 if (glsl_type_is_vector_or_scalar(cur->type)) {
4532 vtn_assert(i == num_indices - 1);
4533 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4534 "All indices in an OpCompositeExtract must be in-bounds");
4535
4536 /* According to the SPIR-V spec, OpCompositeExtract may work down to
4537 * the component granularity. The last index will be the index of the
4538 * vector to extract.
4539 */
4540
4541 const struct glsl_type *scalar_type =
4542 glsl_scalar_type(glsl_get_base_type(cur->type));
4543 struct vtn_ssa_value *ret = vtn_create_ssa_value(b, scalar_type);
4544 ret->def = nir_channel(&b->nb, cur->def, indices[i]);
4545 return ret;
4546 } else {
4547 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4548 "All indices in an OpCompositeExtract must be in-bounds");
4549 cur = cur->elems[indices[i]];
4550 }
4551 }
4552
4553 return cur;
4554 }
4555
4556 static void
vtn_handle_composite(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4557 vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
4558 const uint32_t *w, unsigned count)
4559 {
4560 struct vtn_type *type = vtn_get_type(b, w[1]);
4561 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
4562
4563 switch (opcode) {
4564 case SpvOpVectorExtractDynamic:
4565 ssa->def = nir_vector_extract(&b->nb, vtn_get_nir_ssa(b, w[3]),
4566 vtn_get_nir_ssa(b, w[4]));
4567 break;
4568
4569 case SpvOpVectorInsertDynamic:
4570 ssa->def = nir_vector_insert(&b->nb, vtn_get_nir_ssa(b, w[3]),
4571 vtn_get_nir_ssa(b, w[4]),
4572 vtn_get_nir_ssa(b, w[5]));
4573 break;
4574
4575 case SpvOpVectorShuffle:
4576 ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type),
4577 vtn_get_nir_ssa(b, w[3]),
4578 vtn_get_nir_ssa(b, w[4]),
4579 w + 5);
4580 break;
4581
4582 case SpvOpCompositeConstruct:
4583 case SpvOpCompositeConstructReplicateEXT: {
4584 unsigned elems = count - 3;
4585 assume(elems >= 1);
4586 if (type->base_type == vtn_base_type_cooperative_matrix) {
4587 vtn_assert(elems == 1);
4588 nir_deref_instr *mat = vtn_create_cmat_temporary(b, type->type, "cmat_construct");
4589 nir_cmat_construct(&b->nb, &mat->def, vtn_get_nir_ssa(b, w[3]));
4590 vtn_set_ssa_value_var(b, ssa, mat->var);
4591 } else if (glsl_type_is_vector_or_scalar(type->type)) {
4592 if (opcode == SpvOpCompositeConstructReplicateEXT) {
4593 nir_def *src = vtn_get_nir_ssa(b, w[3]);
4594 vtn_assert(glsl_get_bit_size(type->type) == src->bit_size);
4595 unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
4596 ssa->def = nir_swizzle(&b->nb, src, swiz,
4597 glsl_get_vector_elements(type->type));
4598 } else {
4599 nir_def *srcs[NIR_MAX_VEC_COMPONENTS];
4600 for (unsigned i = 0; i < elems; i++) {
4601 srcs[i] = vtn_get_nir_ssa(b, w[3 + i]);
4602 vtn_assert(glsl_get_bit_size(type->type) == srcs[i]->bit_size);
4603 }
4604 ssa->def =
4605 vtn_vector_construct(b, glsl_get_vector_elements(type->type),
4606 elems, srcs);
4607 }
4608 } else {
4609 ssa->elems = vtn_alloc_array(b, struct vtn_ssa_value *, type->length);
4610 if (opcode == SpvOpCompositeConstructReplicateEXT) {
4611 struct vtn_ssa_value *elem = vtn_ssa_value(b, w[3]);
4612 for (unsigned i = 0; i < type->length; i++)
4613 ssa->elems[i] = elem;
4614 } else {
4615 vtn_fail_if(elems != type->length,
4616 "%s has %u constituents, expected %u",
4617 spirv_op_to_string(opcode), elems, type->length);
4618 for (unsigned i = 0; i < elems; i++)
4619 ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
4620 }
4621 }
4622 break;
4623 }
4624 case SpvOpCompositeExtract:
4625 ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
4626 w + 4, count - 4);
4627 break;
4628
4629 case SpvOpCompositeInsert:
4630 ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
4631 vtn_ssa_value(b, w[3]),
4632 w + 5, count - 5);
4633 break;
4634
4635 case SpvOpCopyLogical: {
4636 ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
4637 struct vtn_type *dst_type = vtn_get_value_type(b, w[2]);
4638 vtn_assert(vtn_types_compatible(b, type, dst_type));
4639 ssa->type = glsl_get_bare_type(dst_type->type);
4640 break;
4641 }
4642 case SpvOpCopyObject:
4643 case SpvOpExpectKHR:
4644 vtn_copy_value(b, w[3], w[2]);
4645 return;
4646
4647 default:
4648 vtn_fail_with_opcode("unknown composite operation", opcode);
4649 }
4650
4651 vtn_push_ssa_value(b, w[2], ssa);
4652 }
4653
4654 static void
vtn_handle_barrier(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4655 vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
4656 const uint32_t *w, UNUSED unsigned count)
4657 {
4658 switch (opcode) {
4659 case SpvOpEmitVertex:
4660 case SpvOpEmitStreamVertex:
4661 case SpvOpEndPrimitive:
4662 case SpvOpEndStreamPrimitive: {
4663 unsigned stream = 0;
4664 if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
4665 stream = vtn_constant_uint(b, w[1]);
4666
4667 switch (opcode) {
4668 case SpvOpEmitStreamVertex:
4669 case SpvOpEmitVertex:
4670 nir_emit_vertex(&b->nb, stream);
4671 break;
4672 case SpvOpEndPrimitive:
4673 case SpvOpEndStreamPrimitive:
4674 nir_end_primitive(&b->nb, stream);
4675 break;
4676 default:
4677 unreachable("Invalid opcode");
4678 }
4679 break;
4680 }
4681
4682 case SpvOpMemoryBarrier: {
4683 SpvScope scope = vtn_constant_uint(b, w[1]);
4684 SpvMemorySemanticsMask semantics = vtn_constant_uint(b, w[2]);
4685 vtn_emit_memory_barrier(b, scope, semantics);
4686 return;
4687 }
4688
4689 case SpvOpControlBarrier: {
4690 SpvScope execution_scope = vtn_constant_uint(b, w[1]);
4691 SpvScope memory_scope = vtn_constant_uint(b, w[2]);
4692 SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]);
4693
4694 /* GLSLang, prior to commit 8297936dd6eb3, emitted OpControlBarrier with
4695 * memory semantics of None for GLSL barrier().
4696 * And before that, prior to c3f1cdfa, emitted the OpControlBarrier with
4697 * Device instead of Workgroup for execution scope.
4698 */
4699 if (b->wa_glslang_cs_barrier &&
4700 b->nb.shader->info.stage == MESA_SHADER_COMPUTE &&
4701 (execution_scope == SpvScopeWorkgroup ||
4702 execution_scope == SpvScopeDevice) &&
4703 memory_semantics == SpvMemorySemanticsMaskNone) {
4704 execution_scope = SpvScopeWorkgroup;
4705 memory_scope = SpvScopeWorkgroup;
4706 memory_semantics = SpvMemorySemanticsAcquireReleaseMask |
4707 SpvMemorySemanticsWorkgroupMemoryMask;
4708 }
4709
4710 /* From the SPIR-V spec:
4711 *
4712 * "When used with the TessellationControl execution model, it also
4713 * implicitly synchronizes the Output Storage Class: Writes to Output
4714 * variables performed by any invocation executed prior to a
4715 * OpControlBarrier will be visible to any other invocation after
4716 * return from that OpControlBarrier."
4717 *
4718 * The same applies to VK_NV_mesh_shader.
4719 */
4720 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL ||
4721 b->nb.shader->info.stage == MESA_SHADER_TASK ||
4722 b->nb.shader->info.stage == MESA_SHADER_MESH) {
4723 memory_semantics &= ~(SpvMemorySemanticsAcquireMask |
4724 SpvMemorySemanticsReleaseMask |
4725 SpvMemorySemanticsAcquireReleaseMask |
4726 SpvMemorySemanticsSequentiallyConsistentMask);
4727 memory_semantics |= SpvMemorySemanticsAcquireReleaseMask |
4728 SpvMemorySemanticsOutputMemoryMask;
4729 if (memory_scope == SpvScopeSubgroup || memory_scope == SpvScopeInvocation)
4730 memory_scope = SpvScopeWorkgroup;
4731 }
4732
4733 vtn_emit_scoped_control_barrier(b, execution_scope, memory_scope,
4734 memory_semantics);
4735 break;
4736 }
4737
4738 default:
4739 unreachable("unknown barrier instruction");
4740 }
4741 }
4742
4743 static enum tess_primitive_mode
tess_primitive_mode_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4744 tess_primitive_mode_from_spv_execution_mode(struct vtn_builder *b,
4745 SpvExecutionMode mode)
4746 {
4747 switch (mode) {
4748 case SpvExecutionModeTriangles:
4749 return TESS_PRIMITIVE_TRIANGLES;
4750 case SpvExecutionModeQuads:
4751 return TESS_PRIMITIVE_QUADS;
4752 case SpvExecutionModeIsolines:
4753 return TESS_PRIMITIVE_ISOLINES;
4754 default:
4755 vtn_fail("Invalid tess primitive type: %s (%u)",
4756 spirv_executionmode_to_string(mode), mode);
4757 }
4758 }
4759
4760 static enum mesa_prim
primitive_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4761 primitive_from_spv_execution_mode(struct vtn_builder *b,
4762 SpvExecutionMode mode)
4763 {
4764 switch (mode) {
4765 case SpvExecutionModeInputPoints:
4766 case SpvExecutionModeOutputPoints:
4767 return MESA_PRIM_POINTS;
4768 case SpvExecutionModeInputLines:
4769 case SpvExecutionModeOutputLinesNV:
4770 return MESA_PRIM_LINES;
4771 case SpvExecutionModeInputLinesAdjacency:
4772 return MESA_PRIM_LINES_ADJACENCY;
4773 case SpvExecutionModeTriangles:
4774 case SpvExecutionModeOutputTrianglesNV:
4775 return MESA_PRIM_TRIANGLES;
4776 case SpvExecutionModeInputTrianglesAdjacency:
4777 return MESA_PRIM_TRIANGLES_ADJACENCY;
4778 case SpvExecutionModeQuads:
4779 return MESA_PRIM_QUADS;
4780 case SpvExecutionModeOutputLineStrip:
4781 return MESA_PRIM_LINE_STRIP;
4782 case SpvExecutionModeOutputTriangleStrip:
4783 return MESA_PRIM_TRIANGLE_STRIP;
4784 default:
4785 vtn_fail("Invalid primitive type: %s (%u)",
4786 spirv_executionmode_to_string(mode), mode);
4787 }
4788 }
4789
4790 static unsigned
vertices_in_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4791 vertices_in_from_spv_execution_mode(struct vtn_builder *b,
4792 SpvExecutionMode mode)
4793 {
4794 switch (mode) {
4795 case SpvExecutionModeInputPoints:
4796 return 1;
4797 case SpvExecutionModeInputLines:
4798 return 2;
4799 case SpvExecutionModeInputLinesAdjacency:
4800 return 4;
4801 case SpvExecutionModeTriangles:
4802 return 3;
4803 case SpvExecutionModeInputTrianglesAdjacency:
4804 return 6;
4805 default:
4806 vtn_fail("Invalid GS input mode: %s (%u)",
4807 spirv_executionmode_to_string(mode), mode);
4808 }
4809 }
4810
4811 gl_shader_stage
vtn_stage_for_execution_model(SpvExecutionModel model)4812 vtn_stage_for_execution_model(SpvExecutionModel model)
4813 {
4814 switch (model) {
4815 case SpvExecutionModelVertex:
4816 return MESA_SHADER_VERTEX;
4817 case SpvExecutionModelTessellationControl:
4818 return MESA_SHADER_TESS_CTRL;
4819 case SpvExecutionModelTessellationEvaluation:
4820 return MESA_SHADER_TESS_EVAL;
4821 case SpvExecutionModelGeometry:
4822 return MESA_SHADER_GEOMETRY;
4823 case SpvExecutionModelFragment:
4824 return MESA_SHADER_FRAGMENT;
4825 case SpvExecutionModelGLCompute:
4826 return MESA_SHADER_COMPUTE;
4827 case SpvExecutionModelKernel:
4828 return MESA_SHADER_KERNEL;
4829 case SpvExecutionModelRayGenerationKHR:
4830 return MESA_SHADER_RAYGEN;
4831 case SpvExecutionModelAnyHitKHR:
4832 return MESA_SHADER_ANY_HIT;
4833 case SpvExecutionModelClosestHitKHR:
4834 return MESA_SHADER_CLOSEST_HIT;
4835 case SpvExecutionModelMissKHR:
4836 return MESA_SHADER_MISS;
4837 case SpvExecutionModelIntersectionKHR:
4838 return MESA_SHADER_INTERSECTION;
4839 case SpvExecutionModelCallableKHR:
4840 return MESA_SHADER_CALLABLE;
4841 case SpvExecutionModelTaskNV:
4842 case SpvExecutionModelTaskEXT:
4843 return MESA_SHADER_TASK;
4844 case SpvExecutionModelMeshNV:
4845 case SpvExecutionModelMeshEXT:
4846 return MESA_SHADER_MESH;
4847 default:
4848 return MESA_SHADER_NONE;
4849 }
4850 }
4851
4852 void
vtn_handle_entry_point(struct vtn_builder * b,const uint32_t * w,unsigned count)4853 vtn_handle_entry_point(struct vtn_builder *b, const uint32_t *w,
4854 unsigned count)
4855 {
4856 struct vtn_value *entry_point = &b->values[w[2]];
4857 /* Let this be a name label regardless */
4858 unsigned name_words;
4859 entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
4860
4861 gl_shader_stage stage = vtn_stage_for_execution_model(w[1]);
4862 vtn_fail_if(stage == MESA_SHADER_NONE,
4863 "Unsupported execution model: %s (%u)",
4864 spirv_executionmodel_to_string(w[1]), w[1]);
4865 if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
4866 stage != b->entry_point_stage)
4867 return;
4868
4869 vtn_assert(b->entry_point == NULL);
4870 b->entry_point = entry_point;
4871
4872 /* Entry points enumerate which global variables are used. */
4873 size_t start = 3 + name_words;
4874 b->interface_ids_count = count - start;
4875 b->interface_ids = vtn_alloc_array(b, uint32_t, b->interface_ids_count);
4876 memcpy(b->interface_ids, &w[start], b->interface_ids_count * 4);
4877 qsort(b->interface_ids, b->interface_ids_count, 4, cmp_uint32_t);
4878 }
4879
4880 static bool
vtn_handle_preamble_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4881 vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
4882 const uint32_t *w, unsigned count)
4883 {
4884 switch (opcode) {
4885 case SpvOpString:
4886 case SpvOpSource:
4887 case SpvOpSourceExtension:
4888 case SpvOpSourceContinued:
4889 case SpvOpModuleProcessed:
4890 vtn_handle_debug_text(b, opcode, w, count);
4891 break;
4892
4893 case SpvOpExtension: {
4894 /* Implementing both NV_mesh_shader and EXT_mesh_shader
4895 * is difficult without knowing which we're dealing with.
4896 * TODO: remove this when we stop supporting NV_mesh_shader.
4897 */
4898 const char *ext_name = (const char *)&w[1];
4899 if (strcmp(ext_name, "SPV_NV_mesh_shader") == 0)
4900 b->shader->info.mesh.nv = true;
4901 break;
4902 }
4903
4904 case SpvOpCapability: {
4905 SpvCapability cap = w[1];
4906 switch (cap) {
4907 case SpvCapabilitySubgroupDispatch:
4908 /* Missing :
4909 * - SpvOpGetKernelLocalSizeForSubgroupCount
4910 * - SpvOpGetKernelMaxNumSubgroups
4911 */
4912 vtn_warn("Not fully supported capability: %s",
4913 spirv_capability_to_string(cap));
4914 break;
4915
4916 default:
4917 vtn_fail_if(!spirv_capabilities_get(&implemented_capabilities, cap),
4918 "Unimplemented SPIR-V capability: %s (%u)",
4919 spirv_capability_to_string(cap), cap);
4920 }
4921
4922 if (!spirv_capabilities_get(&b->supported_capabilities, cap)) {
4923 vtn_warn("Unsupported SPIR-V capability: %s (%u)",
4924 spirv_capability_to_string(cap), cap);
4925 }
4926
4927 spirv_capabilities_set(&b->enabled_capabilities, cap, true);
4928 break;
4929 }
4930
4931 case SpvOpExtInstImport:
4932 vtn_handle_extension(b, opcode, w, count);
4933 break;
4934
4935 case SpvOpMemoryModel:
4936 switch (w[1]) {
4937 case SpvAddressingModelPhysical32:
4938 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4939 "AddressingModelPhysical32 only supported for kernels");
4940 b->shader->info.cs.ptr_size = 32;
4941 b->physical_ptrs = true;
4942 assert(nir_address_format_bit_size(b->options->global_addr_format) == 32);
4943 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4944 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 32);
4945 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4946 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 32);
4947 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4948 break;
4949 case SpvAddressingModelPhysical64:
4950 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4951 "AddressingModelPhysical64 only supported for kernels");
4952 b->shader->info.cs.ptr_size = 64;
4953 b->physical_ptrs = true;
4954 assert(nir_address_format_bit_size(b->options->global_addr_format) == 64);
4955 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4956 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 64);
4957 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4958 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 64);
4959 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4960 break;
4961 case SpvAddressingModelLogical:
4962 vtn_fail_if(b->shader->info.stage == MESA_SHADER_KERNEL,
4963 "AddressingModelLogical only supported for shaders");
4964 b->physical_ptrs = false;
4965 break;
4966 case SpvAddressingModelPhysicalStorageBuffer64:
4967 vtn_fail_if(!b->supported_capabilities.PhysicalStorageBufferAddresses,
4968 "AddressingModelPhysicalStorageBuffer64 not supported");
4969 break;
4970 default:
4971 vtn_fail("Unknown addressing model: %s (%u)",
4972 spirv_addressingmodel_to_string(w[1]), w[1]);
4973 break;
4974 }
4975
4976 b->mem_model = w[2];
4977 switch (w[2]) {
4978 case SpvMemoryModelSimple:
4979 case SpvMemoryModelGLSL450:
4980 case SpvMemoryModelOpenCL:
4981 break;
4982 case SpvMemoryModelVulkan:
4983 vtn_fail_if(!b->supported_capabilities.VulkanMemoryModel,
4984 "Vulkan memory model is unsupported by this driver");
4985 break;
4986 default:
4987 vtn_fail("Unsupported memory model: %s",
4988 spirv_memorymodel_to_string(w[2]));
4989 break;
4990 }
4991 break;
4992
4993 case SpvOpEntryPoint:
4994 vtn_handle_entry_point(b, w, count);
4995 break;
4996
4997 case SpvOpName:
4998 b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
4999 break;
5000
5001 case SpvOpMemberName:
5002 case SpvOpExecutionMode:
5003 case SpvOpExecutionModeId:
5004 case SpvOpDecorationGroup:
5005 case SpvOpDecorate:
5006 case SpvOpDecorateId:
5007 case SpvOpMemberDecorate:
5008 case SpvOpGroupDecorate:
5009 case SpvOpGroupMemberDecorate:
5010 case SpvOpDecorateString:
5011 case SpvOpMemberDecorateString:
5012 vtn_handle_decoration(b, opcode, w, count);
5013 break;
5014
5015 case SpvOpExtInst:
5016 case SpvOpExtInstWithForwardRefsKHR: {
5017 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5018 if (val->ext_handler == vtn_handle_non_semantic_instruction) {
5019 /* NonSemantic extended instructions are acceptable in preamble. */
5020 vtn_handle_non_semantic_instruction(b, w[4], w, count);
5021 return true;
5022 } else {
5023 return false; /* End of preamble. */
5024 }
5025 }
5026
5027 default:
5028 return false; /* End of preamble */
5029 }
5030
5031 return true;
5032 }
5033
5034 void
vtn_handle_debug_text(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5035 vtn_handle_debug_text(struct vtn_builder *b, SpvOp opcode,
5036 const uint32_t *w, unsigned count)
5037 {
5038 switch (opcode) {
5039 case SpvOpString:
5040 vtn_push_value(b, w[1], vtn_value_type_string)->str =
5041 vtn_string_literal(b, &w[2], count - 2, NULL);
5042 break;
5043
5044 case SpvOpSource: {
5045 const char *lang;
5046 switch (w[1]) {
5047 default:
5048 case SpvSourceLanguageUnknown: lang = "unknown"; break;
5049 case SpvSourceLanguageESSL: lang = "ESSL"; break;
5050 case SpvSourceLanguageGLSL: lang = "GLSL"; break;
5051 case SpvSourceLanguageOpenCL_C: lang = "OpenCL C"; break;
5052 case SpvSourceLanguageOpenCL_CPP: lang = "OpenCL C++"; break;
5053 case SpvSourceLanguageHLSL: lang = "HLSL"; break;
5054 }
5055
5056 uint32_t version = w[2];
5057
5058 const char *file =
5059 (count > 3) ? vtn_value(b, w[3], vtn_value_type_string)->str : "";
5060
5061 vtn_info("Parsing SPIR-V from %s %u source file %s", lang, version, file);
5062
5063 b->source_lang = w[1];
5064 break;
5065 }
5066
5067 case SpvOpSourceExtension:
5068 case SpvOpSourceContinued:
5069 case SpvOpModuleProcessed:
5070 /* Unhandled, but these are for debug so that's ok. */
5071 break;
5072
5073 default:
5074 unreachable("Unhandled opcode");
5075 }
5076 }
5077
5078 static void
vtn_handle_execution_mode(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5079 vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
5080 const struct vtn_decoration *mode, UNUSED void *data)
5081 {
5082 vtn_assert(b->entry_point == entry_point);
5083
5084 switch(mode->exec_mode) {
5085 case SpvExecutionModeOriginUpperLeft:
5086 case SpvExecutionModeOriginLowerLeft:
5087 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5088 b->shader->info.fs.origin_upper_left =
5089 (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
5090 break;
5091
5092 case SpvExecutionModeEarlyFragmentTests:
5093 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5094 b->shader->info.fs.early_fragment_tests = true;
5095 break;
5096
5097 case SpvExecutionModePostDepthCoverage:
5098 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5099 b->shader->info.fs.post_depth_coverage = true;
5100 break;
5101
5102 case SpvExecutionModeInvocations:
5103 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5104 b->shader->info.gs.invocations = MAX2(1, mode->operands[0]);
5105 break;
5106
5107 case SpvExecutionModeDepthReplacing:
5108 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5109 if (b->shader->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE)
5110 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
5111 break;
5112 case SpvExecutionModeDepthGreater:
5113 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5114 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
5115 break;
5116 case SpvExecutionModeDepthLess:
5117 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5118 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
5119 break;
5120 case SpvExecutionModeDepthUnchanged:
5121 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5122 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
5123 break;
5124
5125 case SpvExecutionModeLocalSizeHint:
5126 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5127 b->shader->info.cs.workgroup_size_hint[0] = mode->operands[0];
5128 b->shader->info.cs.workgroup_size_hint[1] = mode->operands[1];
5129 b->shader->info.cs.workgroup_size_hint[2] = mode->operands[2];
5130 break;
5131
5132 case SpvExecutionModeLocalSize:
5133 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5134 b->shader->info.workgroup_size[0] = mode->operands[0];
5135 b->shader->info.workgroup_size[1] = mode->operands[1];
5136 b->shader->info.workgroup_size[2] = mode->operands[2];
5137 } else {
5138 vtn_fail("Execution mode LocalSize not supported in stage %s",
5139 _mesa_shader_stage_to_string(b->shader->info.stage));
5140 }
5141 break;
5142
5143 case SpvExecutionModeOutputVertices:
5144 switch (b->shader->info.stage) {
5145 case MESA_SHADER_TESS_CTRL:
5146 case MESA_SHADER_TESS_EVAL:
5147 b->shader->info.tess.tcs_vertices_out = mode->operands[0];
5148 break;
5149 case MESA_SHADER_GEOMETRY:
5150 b->shader->info.gs.vertices_out = mode->operands[0];
5151 break;
5152 case MESA_SHADER_MESH:
5153 b->shader->info.mesh.max_vertices_out = mode->operands[0];
5154 break;
5155 default:
5156 vtn_fail("Execution mode OutputVertices not supported in stage %s",
5157 _mesa_shader_stage_to_string(b->shader->info.stage));
5158 break;
5159 }
5160 break;
5161
5162 case SpvExecutionModeInputPoints:
5163 case SpvExecutionModeInputLines:
5164 case SpvExecutionModeInputLinesAdjacency:
5165 case SpvExecutionModeTriangles:
5166 case SpvExecutionModeInputTrianglesAdjacency:
5167 case SpvExecutionModeQuads:
5168 case SpvExecutionModeIsolines:
5169 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5170 b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
5171 b->shader->info.tess._primitive_mode =
5172 tess_primitive_mode_from_spv_execution_mode(b, mode->exec_mode);
5173 } else {
5174 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5175 b->shader->info.gs.vertices_in =
5176 vertices_in_from_spv_execution_mode(b, mode->exec_mode);
5177 b->shader->info.gs.input_primitive =
5178 primitive_from_spv_execution_mode(b, mode->exec_mode);
5179 }
5180 break;
5181
5182 case SpvExecutionModeOutputPrimitivesNV:
5183 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5184 b->shader->info.mesh.max_primitives_out = mode->operands[0];
5185 break;
5186
5187 case SpvExecutionModeOutputLinesNV:
5188 case SpvExecutionModeOutputTrianglesNV:
5189 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5190 b->shader->info.mesh.primitive_type =
5191 primitive_from_spv_execution_mode(b, mode->exec_mode);
5192 break;
5193
5194 case SpvExecutionModeOutputPoints: {
5195 const unsigned primitive =
5196 primitive_from_spv_execution_mode(b, mode->exec_mode);
5197
5198 switch (b->shader->info.stage) {
5199 case MESA_SHADER_GEOMETRY:
5200 b->shader->info.gs.output_primitive = primitive;
5201 break;
5202 case MESA_SHADER_MESH:
5203 b->shader->info.mesh.primitive_type = primitive;
5204 break;
5205 default:
5206 vtn_fail("Execution mode OutputPoints not supported in stage %s",
5207 _mesa_shader_stage_to_string(b->shader->info.stage));
5208 break;
5209 }
5210 break;
5211 }
5212
5213 case SpvExecutionModeOutputLineStrip:
5214 case SpvExecutionModeOutputTriangleStrip:
5215 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5216 b->shader->info.gs.output_primitive =
5217 primitive_from_spv_execution_mode(b, mode->exec_mode);
5218 break;
5219
5220 case SpvExecutionModeSpacingEqual:
5221 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5222 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5223 b->shader->info.tess.spacing = TESS_SPACING_EQUAL;
5224 break;
5225 case SpvExecutionModeSpacingFractionalEven:
5226 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5227 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5228 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN;
5229 break;
5230 case SpvExecutionModeSpacingFractionalOdd:
5231 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5232 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5233 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD;
5234 break;
5235 case SpvExecutionModeVertexOrderCw:
5236 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5237 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5238 b->shader->info.tess.ccw = false;
5239 break;
5240 case SpvExecutionModeVertexOrderCcw:
5241 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5242 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5243 b->shader->info.tess.ccw = true;
5244 break;
5245 case SpvExecutionModePointMode:
5246 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5247 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5248 b->shader->info.tess.point_mode = true;
5249 break;
5250
5251 case SpvExecutionModePixelCenterInteger:
5252 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5253 b->shader->info.fs.pixel_center_integer = true;
5254 break;
5255
5256 case SpvExecutionModeXfb:
5257 b->shader->info.has_transform_feedback_varyings = true;
5258 break;
5259
5260 case SpvExecutionModeVecTypeHint:
5261 break; /* OpenCL */
5262
5263 case SpvExecutionModeContractionOff:
5264 if (b->shader->info.stage != MESA_SHADER_KERNEL)
5265 vtn_warn("ExectionMode only allowed for CL-style kernels: %s",
5266 spirv_executionmode_to_string(mode->exec_mode));
5267 else
5268 b->exact = true;
5269 break;
5270
5271 case SpvExecutionModeStencilRefReplacingEXT:
5272 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5273 break;
5274
5275 case SpvExecutionModeDerivativeGroupQuadsKHR:
5276 vtn_assert(gl_shader_stage_uses_workgroup(b->shader->info.stage));
5277 b->shader->info.derivative_group = DERIVATIVE_GROUP_QUADS;
5278 break;
5279
5280 case SpvExecutionModeDerivativeGroupLinearKHR:
5281 vtn_assert(gl_shader_stage_uses_workgroup(b->shader->info.stage));
5282 b->shader->info.derivative_group = DERIVATIVE_GROUP_LINEAR;
5283 break;
5284
5285 case SpvExecutionModePixelInterlockOrderedEXT:
5286 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5287 b->shader->info.fs.pixel_interlock_ordered = true;
5288 break;
5289
5290 case SpvExecutionModePixelInterlockUnorderedEXT:
5291 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5292 b->shader->info.fs.pixel_interlock_unordered = true;
5293 break;
5294
5295 case SpvExecutionModeSampleInterlockOrderedEXT:
5296 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5297 b->shader->info.fs.sample_interlock_ordered = true;
5298 break;
5299
5300 case SpvExecutionModeSampleInterlockUnorderedEXT:
5301 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5302 b->shader->info.fs.sample_interlock_unordered = true;
5303 break;
5304
5305 case SpvExecutionModeDenormPreserve:
5306 case SpvExecutionModeDenormFlushToZero:
5307 case SpvExecutionModeSignedZeroInfNanPreserve:
5308 case SpvExecutionModeRoundingModeRTE:
5309 case SpvExecutionModeRoundingModeRTZ: {
5310 unsigned execution_mode = 0;
5311 switch (mode->exec_mode) {
5312 case SpvExecutionModeDenormPreserve:
5313 switch (mode->operands[0]) {
5314 case 16: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP16; break;
5315 case 32: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP32; break;
5316 case 64: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP64; break;
5317 default: vtn_fail("Floating point type not supported");
5318 }
5319 break;
5320 case SpvExecutionModeDenormFlushToZero:
5321 switch (mode->operands[0]) {
5322 case 16: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16; break;
5323 case 32: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32; break;
5324 case 64: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64; break;
5325 default: vtn_fail("Floating point type not supported");
5326 }
5327 break;
5328 case SpvExecutionModeSignedZeroInfNanPreserve:
5329 switch (mode->operands[0]) {
5330 case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break;
5331 case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break;
5332 case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break;
5333 default: vtn_fail("Floating point type not supported");
5334 }
5335 break;
5336 case SpvExecutionModeRoundingModeRTE:
5337 switch (mode->operands[0]) {
5338 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16; break;
5339 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32; break;
5340 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64; break;
5341 default: vtn_fail("Floating point type not supported");
5342 }
5343 break;
5344 case SpvExecutionModeRoundingModeRTZ:
5345 switch (mode->operands[0]) {
5346 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16; break;
5347 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32; break;
5348 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; break;
5349 default: vtn_fail("Floating point type not supported");
5350 }
5351 break;
5352 default:
5353 break;
5354 }
5355
5356 b->shader->info.float_controls_execution_mode |= execution_mode;
5357
5358 for (unsigned bit_size = 16; bit_size <= 64; bit_size *= 2) {
5359 vtn_fail_if(nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, bit_size) &&
5360 nir_is_denorm_preserve(b->shader->info.float_controls_execution_mode, bit_size),
5361 "Cannot flush to zero and preserve denorms for the same bit size.");
5362 vtn_fail_if(nir_is_rounding_mode_rtne(b->shader->info.float_controls_execution_mode, bit_size) &&
5363 nir_is_rounding_mode_rtz(b->shader->info.float_controls_execution_mode, bit_size),
5364 "Cannot set rounding mode to RTNE and RTZ for the same bit size.");
5365 }
5366 break;
5367 }
5368
5369 case SpvExecutionModeMaximallyReconvergesKHR:
5370 b->shader->info.maximally_reconverges = true;
5371 break;
5372
5373 case SpvExecutionModeLocalSizeId:
5374 case SpvExecutionModeLocalSizeHintId:
5375 case SpvExecutionModeSubgroupsPerWorkgroupId:
5376 case SpvExecutionModeFPFastMathDefault:
5377 case SpvExecutionModeMaxNodeRecursionAMDX:
5378 case SpvExecutionModeStaticNumWorkgroupsAMDX:
5379 case SpvExecutionModeMaxNumWorkgroupsAMDX:
5380 case SpvExecutionModeShaderIndexAMDX:
5381 /* Handled later by vtn_handle_execution_mode_id(). */
5382 break;
5383
5384 case SpvExecutionModeSubgroupSize:
5385 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5386 vtn_assert(b->shader->info.subgroup_size == SUBGROUP_SIZE_VARYING);
5387 b->shader->info.subgroup_size = mode->operands[0];
5388 break;
5389
5390 case SpvExecutionModeSubgroupsPerWorkgroup:
5391 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5392 b->shader->info.num_subgroups = mode->operands[0];
5393 break;
5394
5395 case SpvExecutionModeSubgroupUniformControlFlowKHR:
5396 /* Nothing to do here */
5397 break;
5398
5399 case SpvExecutionModeEarlyAndLateFragmentTestsAMD:
5400 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5401 b->shader->info.fs.early_and_late_fragment_tests = true;
5402 break;
5403
5404 case SpvExecutionModeStencilRefGreaterFrontAMD:
5405 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5406 b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_GREATER;
5407 break;
5408
5409 case SpvExecutionModeStencilRefLessFrontAMD:
5410 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5411 b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_LESS;
5412 break;
5413
5414 case SpvExecutionModeStencilRefUnchangedFrontAMD:
5415 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5416 b->shader->info.fs.stencil_front_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5417 break;
5418
5419 case SpvExecutionModeStencilRefGreaterBackAMD:
5420 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5421 b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_GREATER;
5422 break;
5423
5424 case SpvExecutionModeStencilRefLessBackAMD:
5425 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5426 b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_LESS;
5427 break;
5428
5429 case SpvExecutionModeStencilRefUnchangedBackAMD:
5430 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5431 b->shader->info.fs.stencil_back_layout = FRAG_STENCIL_LAYOUT_UNCHANGED;
5432 break;
5433
5434 case SpvExecutionModeRequireFullQuadsKHR:
5435 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5436 b->shader->info.fs.require_full_quads = true;
5437 break;
5438
5439 case SpvExecutionModeQuadDerivativesKHR:
5440 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5441 b->shader->info.fs.quad_derivatives = true;
5442 break;
5443
5444 case SpvExecutionModeCoalescingAMDX:
5445 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5446 b->shader->info.cs.workgroup_count[0] = 1;
5447 b->shader->info.cs.workgroup_count[1] = 1;
5448 b->shader->info.cs.workgroup_count[2] = 1;
5449 break;
5450
5451 default:
5452 vtn_fail("Unhandled execution mode: %s (%u)",
5453 spirv_executionmode_to_string(mode->exec_mode),
5454 mode->exec_mode);
5455 }
5456 }
5457
5458 static void
vtn_handle_execution_mode_id(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5459 vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_point,
5460 const struct vtn_decoration *mode, UNUSED void *data)
5461 {
5462
5463 vtn_assert(b->entry_point == entry_point);
5464
5465 switch (mode->exec_mode) {
5466 case SpvExecutionModeLocalSizeId:
5467 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5468 b->shader->info.workgroup_size[0] = vtn_constant_uint(b, mode->operands[0]);
5469 b->shader->info.workgroup_size[1] = vtn_constant_uint(b, mode->operands[1]);
5470 b->shader->info.workgroup_size[2] = vtn_constant_uint(b, mode->operands[2]);
5471 } else {
5472 vtn_fail("Execution mode LocalSizeId not supported in stage %s",
5473 _mesa_shader_stage_to_string(b->shader->info.stage));
5474 }
5475 break;
5476
5477 case SpvExecutionModeLocalSizeHintId:
5478 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5479 b->shader->info.cs.workgroup_size_hint[0] = vtn_constant_uint(b, mode->operands[0]);
5480 b->shader->info.cs.workgroup_size_hint[1] = vtn_constant_uint(b, mode->operands[1]);
5481 b->shader->info.cs.workgroup_size_hint[2] = vtn_constant_uint(b, mode->operands[2]);
5482 break;
5483
5484 case SpvExecutionModeSubgroupsPerWorkgroupId:
5485 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5486 b->shader->info.num_subgroups = vtn_constant_uint(b, mode->operands[0]);
5487 break;
5488
5489 case SpvExecutionModeFPFastMathDefault: {
5490 struct vtn_type *type = vtn_get_type(b, mode->operands[0]);
5491 SpvFPFastMathModeMask flags = vtn_constant_uint(b, mode->operands[1]);
5492
5493 SpvFPFastMathModeMask can_fast_math =
5494 SpvFPFastMathModeAllowRecipMask |
5495 SpvFPFastMathModeAllowContractMask |
5496 SpvFPFastMathModeAllowReassocMask |
5497 SpvFPFastMathModeAllowTransformMask;
5498 if ((flags & can_fast_math) != can_fast_math)
5499 b->exact = true;
5500
5501 unsigned execution_mode = 0;
5502 if (!(flags & SpvFPFastMathModeNotNaNMask)) {
5503 switch (glsl_get_bit_size(type->type)) {
5504 case 16: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP16; break;
5505 case 32: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP32; break;
5506 case 64: execution_mode |= FLOAT_CONTROLS_NAN_PRESERVE_FP64; break;
5507 }
5508 }
5509 if (!(flags & SpvFPFastMathModeNotInfMask)) {
5510 switch (glsl_get_bit_size(type->type)) {
5511 case 16: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP16; break;
5512 case 32: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP32; break;
5513 case 64: execution_mode |= FLOAT_CONTROLS_INF_PRESERVE_FP64; break;
5514 }
5515 }
5516 if (!(flags & SpvFPFastMathModeNSZMask)) {
5517 switch (glsl_get_bit_size(type->type)) {
5518 case 16: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16; break;
5519 case 32: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32; break;
5520 case 64: execution_mode |= FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64; break;
5521 }
5522 }
5523 b->shader->info.float_controls_execution_mode |= execution_mode;
5524 break;
5525 }
5526
5527 case SpvExecutionModeMaxNodeRecursionAMDX:
5528 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5529 break;
5530
5531 case SpvExecutionModeStaticNumWorkgroupsAMDX:
5532 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5533 b->shader->info.cs.workgroup_count[0] = vtn_constant_uint(b, mode->operands[0]);
5534 b->shader->info.cs.workgroup_count[1] = vtn_constant_uint(b, mode->operands[1]);
5535 b->shader->info.cs.workgroup_count[2] = vtn_constant_uint(b, mode->operands[2]);
5536 assert(b->shader->info.cs.workgroup_count[0]);
5537 assert(b->shader->info.cs.workgroup_count[1]);
5538 assert(b->shader->info.cs.workgroup_count[2]);
5539 break;
5540
5541 case SpvExecutionModeMaxNumWorkgroupsAMDX:
5542 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5543 break;
5544
5545 case SpvExecutionModeShaderIndexAMDX:
5546 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5547 b->shader->info.cs.shader_index = vtn_constant_uint(b, mode->operands[0]);
5548 break;
5549
5550 default:
5551 /* Nothing to do. Literal execution modes already handled by
5552 * vtn_handle_execution_mode(). */
5553 break;
5554 }
5555 }
5556
5557 static bool
vtn_handle_variable_or_type_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5558 vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
5559 const uint32_t *w, unsigned count)
5560 {
5561 vtn_set_instruction_result_type(b, opcode, w, count);
5562
5563 switch (opcode) {
5564 case SpvOpSource:
5565 case SpvOpSourceContinued:
5566 case SpvOpSourceExtension:
5567 case SpvOpExtension:
5568 case SpvOpCapability:
5569 case SpvOpExtInstImport:
5570 case SpvOpMemoryModel:
5571 case SpvOpEntryPoint:
5572 case SpvOpExecutionMode:
5573 case SpvOpString:
5574 case SpvOpName:
5575 case SpvOpMemberName:
5576 case SpvOpDecorationGroup:
5577 case SpvOpDecorate:
5578 case SpvOpDecorateId:
5579 case SpvOpMemberDecorate:
5580 case SpvOpGroupDecorate:
5581 case SpvOpGroupMemberDecorate:
5582 case SpvOpDecorateString:
5583 case SpvOpMemberDecorateString:
5584 vtn_fail("Invalid opcode types and variables section");
5585 break;
5586
5587 case SpvOpTypeVoid:
5588 case SpvOpTypeBool:
5589 case SpvOpTypeInt:
5590 case SpvOpTypeFloat:
5591 case SpvOpTypeVector:
5592 case SpvOpTypeMatrix:
5593 case SpvOpTypeImage:
5594 case SpvOpTypeSampler:
5595 case SpvOpTypeSampledImage:
5596 case SpvOpTypeArray:
5597 case SpvOpTypeRuntimeArray:
5598 case SpvOpTypeStruct:
5599 case SpvOpTypeOpaque:
5600 case SpvOpTypePointer:
5601 case SpvOpTypeForwardPointer:
5602 case SpvOpTypeFunction:
5603 case SpvOpTypeEvent:
5604 case SpvOpTypeDeviceEvent:
5605 case SpvOpTypeReserveId:
5606 case SpvOpTypeQueue:
5607 case SpvOpTypePipe:
5608 case SpvOpTypeAccelerationStructureKHR:
5609 case SpvOpTypeRayQueryKHR:
5610 case SpvOpTypeCooperativeMatrixKHR:
5611 vtn_handle_type(b, opcode, w, count);
5612 break;
5613
5614 case SpvOpConstantTrue:
5615 case SpvOpConstantFalse:
5616 case SpvOpConstant:
5617 case SpvOpConstantComposite:
5618 case SpvOpConstantCompositeReplicateEXT:
5619 case SpvOpConstantNull:
5620 case SpvOpSpecConstantTrue:
5621 case SpvOpSpecConstantFalse:
5622 case SpvOpSpecConstant:
5623 case SpvOpSpecConstantComposite:
5624 case SpvOpSpecConstantCompositeReplicateEXT:
5625 case SpvOpSpecConstantOp:
5626 vtn_handle_constant(b, opcode, w, count);
5627 break;
5628
5629 case SpvOpUndef:
5630 case SpvOpVariable:
5631 case SpvOpConstantSampler:
5632 vtn_handle_variables(b, opcode, w, count);
5633 break;
5634
5635 case SpvOpExtInst:
5636 case SpvOpExtInstWithForwardRefsKHR: {
5637 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5638 /* NonSemantic extended instructions are acceptable in preamble, others
5639 * will indicate the end of preamble.
5640 */
5641 return val->ext_handler == vtn_handle_non_semantic_instruction;
5642 }
5643
5644 default:
5645 return false; /* End of preamble */
5646 }
5647
5648 return true;
5649 }
5650
5651 static struct vtn_ssa_value *
vtn_nir_select(struct vtn_builder * b,struct vtn_ssa_value * src0,struct vtn_ssa_value * src1,struct vtn_ssa_value * src2)5652 vtn_nir_select(struct vtn_builder *b, struct vtn_ssa_value *src0,
5653 struct vtn_ssa_value *src1, struct vtn_ssa_value *src2)
5654 {
5655 struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
5656 dest->type = src1->type;
5657
5658 if (src1->is_variable || src2->is_variable) {
5659 vtn_assert(src1->is_variable && src2->is_variable);
5660
5661 nir_variable *dest_var =
5662 nir_local_variable_create(b->nb.impl, dest->type, "var_select");
5663 nir_deref_instr *dest_deref = nir_build_deref_var(&b->nb, dest_var);
5664
5665 nir_push_if(&b->nb, src0->def);
5666 {
5667 nir_deref_instr *src1_deref = vtn_get_deref_for_ssa_value(b, src1);
5668 vtn_local_store(b, vtn_local_load(b, src1_deref, 0), dest_deref, 0);
5669 }
5670 nir_push_else(&b->nb, NULL);
5671 {
5672 nir_deref_instr *src2_deref = vtn_get_deref_for_ssa_value(b, src2);
5673 vtn_local_store(b, vtn_local_load(b, src2_deref, 0), dest_deref, 0);
5674 }
5675 nir_pop_if(&b->nb, NULL);
5676
5677 vtn_set_ssa_value_var(b, dest, dest_var);
5678 } else if (glsl_type_is_vector_or_scalar(src1->type)) {
5679 dest->def = nir_bcsel(&b->nb, src0->def, src1->def, src2->def);
5680 } else {
5681 unsigned elems = glsl_get_length(src1->type);
5682
5683 dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
5684 for (unsigned i = 0; i < elems; i++) {
5685 dest->elems[i] = vtn_nir_select(b, src0,
5686 src1->elems[i], src2->elems[i]);
5687 }
5688 }
5689
5690 return dest;
5691 }
5692
5693 static void
vtn_handle_select(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5694 vtn_handle_select(struct vtn_builder *b, SpvOp opcode,
5695 const uint32_t *w, unsigned count)
5696 {
5697 /* Handle OpSelect up-front here because it needs to be able to handle
5698 * pointers and not just regular vectors and scalars.
5699 */
5700 struct vtn_value *res_val = vtn_untyped_value(b, w[2]);
5701 struct vtn_value *cond_val = vtn_untyped_value(b, w[3]);
5702 struct vtn_value *obj1_val = vtn_untyped_value(b, w[4]);
5703 struct vtn_value *obj2_val = vtn_untyped_value(b, w[5]);
5704
5705 vtn_fail_if(obj1_val->type != res_val->type ||
5706 obj2_val->type != res_val->type,
5707 "Object types must match the result type in OpSelect (%%%u = %%%u ? %%%u : %%%u)", w[2], w[3], w[4], w[5]);
5708
5709 vtn_fail_if((cond_val->type->base_type != vtn_base_type_scalar &&
5710 cond_val->type->base_type != vtn_base_type_vector) ||
5711 !glsl_type_is_boolean(cond_val->type->type),
5712 "OpSelect must have either a vector of booleans or "
5713 "a boolean as Condition type");
5714
5715 vtn_fail_if(cond_val->type->base_type == vtn_base_type_vector &&
5716 (res_val->type->base_type != vtn_base_type_vector ||
5717 res_val->type->length != cond_val->type->length),
5718 "When Condition type in OpSelect is a vector, the Result "
5719 "type must be a vector of the same length");
5720
5721 switch (res_val->type->base_type) {
5722 case vtn_base_type_scalar:
5723 case vtn_base_type_vector:
5724 case vtn_base_type_matrix:
5725 case vtn_base_type_array:
5726 case vtn_base_type_struct:
5727 /* OK. */
5728 break;
5729 case vtn_base_type_pointer:
5730 /* We need to have actual storage for pointer types. */
5731 vtn_fail_if(res_val->type->type == NULL,
5732 "Invalid pointer result type for OpSelect");
5733 break;
5734 default:
5735 vtn_fail("Result type of OpSelect must be a scalar, composite, or pointer");
5736 }
5737
5738 vtn_push_ssa_value(b, w[2],
5739 vtn_nir_select(b, vtn_ssa_value(b, w[3]),
5740 vtn_ssa_value(b, w[4]),
5741 vtn_ssa_value(b, w[5])));
5742 }
5743
5744 static void
vtn_handle_ptr(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5745 vtn_handle_ptr(struct vtn_builder *b, SpvOp opcode,
5746 const uint32_t *w, unsigned count)
5747 {
5748 struct vtn_type *type1 = vtn_get_value_type(b, w[3]);
5749 struct vtn_type *type2 = vtn_get_value_type(b, w[4]);
5750 vtn_fail_if(type1->base_type != vtn_base_type_pointer ||
5751 type2->base_type != vtn_base_type_pointer,
5752 "%s operands must have pointer types",
5753 spirv_op_to_string(opcode));
5754 vtn_fail_if(type1->storage_class != type2->storage_class,
5755 "%s operands must have the same storage class",
5756 spirv_op_to_string(opcode));
5757
5758 struct vtn_type *vtn_type = vtn_get_type(b, w[1]);
5759 const struct glsl_type *type = vtn_type->type;
5760
5761 nir_address_format addr_format = vtn_mode_to_address_format(
5762 b, vtn_storage_class_to_mode(b, type1->storage_class, NULL, NULL));
5763
5764 nir_def *def;
5765
5766 switch (opcode) {
5767 case SpvOpPtrDiff: {
5768 /* OpPtrDiff returns the difference in number of elements (not byte offset). */
5769 unsigned elem_size, elem_align;
5770 glsl_get_natural_size_align_bytes(type1->pointed->type,
5771 &elem_size, &elem_align);
5772
5773 def = nir_build_addr_isub(&b->nb,
5774 vtn_get_nir_ssa(b, w[3]),
5775 vtn_get_nir_ssa(b, w[4]),
5776 addr_format);
5777 def = nir_idiv(&b->nb, def, nir_imm_intN_t(&b->nb, elem_size, def->bit_size));
5778 def = nir_i2iN(&b->nb, def, glsl_get_bit_size(type));
5779 break;
5780 }
5781
5782 case SpvOpPtrEqual:
5783 case SpvOpPtrNotEqual: {
5784 def = nir_build_addr_ieq(&b->nb,
5785 vtn_get_nir_ssa(b, w[3]),
5786 vtn_get_nir_ssa(b, w[4]),
5787 addr_format);
5788 if (opcode == SpvOpPtrNotEqual)
5789 def = nir_inot(&b->nb, def);
5790 break;
5791 }
5792
5793 default:
5794 unreachable("Invalid ptr operation");
5795 }
5796
5797 vtn_push_nir_ssa(b, w[2], def);
5798 }
5799
5800 static void
vtn_handle_ray_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5801 vtn_handle_ray_intrinsic(struct vtn_builder *b, SpvOp opcode,
5802 const uint32_t *w, unsigned count)
5803 {
5804 nir_intrinsic_instr *intrin;
5805
5806 switch (opcode) {
5807 case SpvOpTraceNV:
5808 case SpvOpTraceRayKHR: {
5809 intrin = nir_intrinsic_instr_create(b->nb.shader,
5810 nir_intrinsic_trace_ray);
5811
5812 /* The sources are in the same order in the NIR intrinsic */
5813 for (unsigned i = 0; i < 10; i++)
5814 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
5815
5816 nir_deref_instr *payload;
5817 if (opcode == SpvOpTraceNV)
5818 payload = vtn_get_call_payload_for_location(b, w[11]);
5819 else
5820 payload = vtn_nir_deref(b, w[11]);
5821 intrin->src[10] = nir_src_for_ssa(&payload->def);
5822 nir_builder_instr_insert(&b->nb, &intrin->instr);
5823 break;
5824 }
5825
5826 case SpvOpReportIntersectionKHR: {
5827 intrin = nir_intrinsic_instr_create(b->nb.shader,
5828 nir_intrinsic_report_ray_intersection);
5829 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
5830 intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
5831 nir_def_init(&intrin->instr, &intrin->def, 1, 1);
5832 nir_builder_instr_insert(&b->nb, &intrin->instr);
5833 vtn_push_nir_ssa(b, w[2], &intrin->def);
5834 break;
5835 }
5836
5837 case SpvOpIgnoreIntersectionNV:
5838 intrin = nir_intrinsic_instr_create(b->nb.shader,
5839 nir_intrinsic_ignore_ray_intersection);
5840 nir_builder_instr_insert(&b->nb, &intrin->instr);
5841 break;
5842
5843 case SpvOpTerminateRayNV:
5844 intrin = nir_intrinsic_instr_create(b->nb.shader,
5845 nir_intrinsic_terminate_ray);
5846 nir_builder_instr_insert(&b->nb, &intrin->instr);
5847 break;
5848
5849 case SpvOpExecuteCallableNV:
5850 case SpvOpExecuteCallableKHR: {
5851 intrin = nir_intrinsic_instr_create(b->nb.shader,
5852 nir_intrinsic_execute_callable);
5853 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
5854 nir_deref_instr *payload;
5855 if (opcode == SpvOpExecuteCallableNV)
5856 payload = vtn_get_call_payload_for_location(b, w[2]);
5857 else
5858 payload = vtn_nir_deref(b, w[2]);
5859 intrin->src[1] = nir_src_for_ssa(&payload->def);
5860 nir_builder_instr_insert(&b->nb, &intrin->instr);
5861 break;
5862 }
5863
5864 default:
5865 vtn_fail_with_opcode("Unhandled opcode", opcode);
5866 }
5867 }
5868
5869 static void
vtn_handle_write_packed_primitive_indices(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5870 vtn_handle_write_packed_primitive_indices(struct vtn_builder *b, SpvOp opcode,
5871 const uint32_t *w, unsigned count)
5872 {
5873 vtn_assert(opcode == SpvOpWritePackedPrimitiveIndices4x8NV);
5874
5875 /* TODO(mesh): Use or create a primitive that allow the unpacking to
5876 * happen in the backend. What we have here is functional but too
5877 * blunt.
5878 */
5879
5880 struct vtn_type *offset_type = vtn_get_value_type(b, w[1]);
5881 vtn_fail_if(offset_type->base_type != vtn_base_type_scalar ||
5882 offset_type->type != glsl_uint_type(),
5883 "Index Offset type of OpWritePackedPrimitiveIndices4x8NV "
5884 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5885
5886 struct vtn_type *packed_type = vtn_get_value_type(b, w[2]);
5887 vtn_fail_if(packed_type->base_type != vtn_base_type_scalar ||
5888 packed_type->type != glsl_uint_type(),
5889 "Packed Indices type of OpWritePackedPrimitiveIndices4x8NV "
5890 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5891
5892 nir_deref_instr *indices = NULL;
5893 nir_foreach_variable_with_modes(var, b->nb.shader, nir_var_shader_out) {
5894 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) {
5895 indices = nir_build_deref_var(&b->nb, var);
5896 break;
5897 }
5898 }
5899
5900 /* It may be the case that the variable is not present in the
5901 * entry point interface list.
5902 *
5903 * See https://github.com/KhronosGroup/SPIRV-Registry/issues/104.
5904 */
5905
5906 if (!indices) {
5907 unsigned vertices_per_prim =
5908 mesa_vertices_per_prim(b->shader->info.mesh.primitive_type);
5909 unsigned max_prim_indices =
5910 vertices_per_prim * b->shader->info.mesh.max_primitives_out;
5911 const struct glsl_type *t =
5912 glsl_array_type(glsl_uint_type(), max_prim_indices, 0);
5913 nir_variable *var =
5914 nir_variable_create(b->shader, nir_var_shader_out, t,
5915 "gl_PrimitiveIndicesNV");
5916
5917 var->data.location = VARYING_SLOT_PRIMITIVE_INDICES;
5918 var->data.interpolation = INTERP_MODE_NONE;
5919 indices = nir_build_deref_var(&b->nb, var);
5920 }
5921
5922 nir_def *offset = vtn_get_nir_ssa(b, w[1]);
5923 nir_def *packed = vtn_get_nir_ssa(b, w[2]);
5924 nir_def *unpacked = nir_unpack_bits(&b->nb, packed, 8);
5925 for (int i = 0; i < 4; i++) {
5926 nir_deref_instr *offset_deref =
5927 nir_build_deref_array(&b->nb, indices,
5928 nir_iadd_imm(&b->nb, offset, i));
5929 nir_def *val = nir_u2u32(&b->nb, nir_channel(&b->nb, unpacked, i));
5930
5931 nir_store_deref(&b->nb, offset_deref, val, 0x1);
5932 }
5933 }
5934
5935 struct ray_query_value {
5936 nir_ray_query_value nir_value;
5937 const struct glsl_type *glsl_type;
5938 };
5939
5940 static struct ray_query_value
spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode)5941 spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder *b,
5942 SpvOp opcode)
5943 {
5944 switch (opcode) {
5945 #define CASE(_spv, _nir, _type) case SpvOpRayQueryGet##_spv: \
5946 return (struct ray_query_value) { .nir_value = nir_ray_query_value_##_nir, .glsl_type = _type }
5947 CASE(RayTMinKHR, tmin, glsl_floatN_t_type(32));
5948 CASE(RayFlagsKHR, flags, glsl_uint_type());
5949 CASE(WorldRayDirectionKHR, world_ray_direction, glsl_vec_type(3));
5950 CASE(WorldRayOriginKHR, world_ray_origin, glsl_vec_type(3));
5951 CASE(IntersectionTypeKHR, intersection_type, glsl_uint_type());
5952 CASE(IntersectionTKHR, intersection_t, glsl_floatN_t_type(32));
5953 CASE(IntersectionInstanceCustomIndexKHR, intersection_instance_custom_index, glsl_int_type());
5954 CASE(IntersectionInstanceIdKHR, intersection_instance_id, glsl_int_type());
5955 CASE(IntersectionInstanceShaderBindingTableRecordOffsetKHR, intersection_instance_sbt_index, glsl_uint_type());
5956 CASE(IntersectionGeometryIndexKHR, intersection_geometry_index, glsl_int_type());
5957 CASE(IntersectionPrimitiveIndexKHR, intersection_primitive_index, glsl_int_type());
5958 CASE(IntersectionBarycentricsKHR, intersection_barycentrics, glsl_vec_type(2));
5959 CASE(IntersectionFrontFaceKHR, intersection_front_face, glsl_bool_type());
5960 CASE(IntersectionCandidateAABBOpaqueKHR, intersection_candidate_aabb_opaque, glsl_bool_type());
5961 CASE(IntersectionObjectToWorldKHR, intersection_object_to_world, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
5962 CASE(IntersectionWorldToObjectKHR, intersection_world_to_object, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
5963 CASE(IntersectionObjectRayOriginKHR, intersection_object_ray_origin, glsl_vec_type(3));
5964 CASE(IntersectionObjectRayDirectionKHR, intersection_object_ray_direction, glsl_vec_type(3));
5965 CASE(IntersectionTriangleVertexPositionsKHR, intersection_triangle_vertex_positions, glsl_array_type(glsl_vec_type(3), 3,
5966 glsl_get_explicit_stride(glsl_vec_type(3))));
5967 #undef CASE
5968 default:
5969 vtn_fail_with_opcode("Unhandled opcode", opcode);
5970 }
5971 }
5972
5973 static void
ray_query_load_intrinsic_create(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_def * src0,bool committed)5974 ray_query_load_intrinsic_create(struct vtn_builder *b, SpvOp opcode,
5975 const uint32_t *w, nir_def *src0,
5976 bool committed)
5977 {
5978 struct ray_query_value value =
5979 spirv_to_nir_type_ray_query_intrinsic(b, opcode);
5980
5981 if (glsl_type_is_array_or_matrix(value.glsl_type)) {
5982 const struct glsl_type *elem_type = glsl_get_array_element(value.glsl_type);
5983 const unsigned elems = glsl_get_length(value.glsl_type);
5984
5985 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, value.glsl_type);
5986 for (unsigned i = 0; i < elems; i++) {
5987 ssa->elems[i]->def =
5988 nir_rq_load(&b->nb,
5989 glsl_get_vector_elements(elem_type),
5990 glsl_get_bit_size(elem_type),
5991 src0,
5992 .ray_query_value = value.nir_value,
5993 .committed = committed,
5994 .column = i);
5995 }
5996
5997 vtn_push_ssa_value(b, w[2], ssa);
5998 } else {
5999 assert(glsl_type_is_vector_or_scalar(value.glsl_type));
6000
6001 vtn_push_nir_ssa(b, w[2],
6002 nir_rq_load(&b->nb,
6003 glsl_get_vector_elements(value.glsl_type),
6004 glsl_get_bit_size(value.glsl_type),
6005 src0,
6006 .ray_query_value = value.nir_value,
6007 .committed = committed));
6008 }
6009 }
6010
6011 static void
vtn_handle_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6012 vtn_handle_ray_query_intrinsic(struct vtn_builder *b, SpvOp opcode,
6013 const uint32_t *w, unsigned count)
6014 {
6015 switch (opcode) {
6016 case SpvOpRayQueryInitializeKHR: {
6017 nir_intrinsic_instr *intrin =
6018 nir_intrinsic_instr_create(b->nb.shader,
6019 nir_intrinsic_rq_initialize);
6020 /* The sources are in the same order in the NIR intrinsic */
6021 for (unsigned i = 0; i < 8; i++)
6022 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
6023 nir_builder_instr_insert(&b->nb, &intrin->instr);
6024 break;
6025 }
6026
6027 case SpvOpRayQueryTerminateKHR:
6028 nir_rq_terminate(&b->nb, vtn_ssa_value(b, w[1])->def);
6029 break;
6030
6031 case SpvOpRayQueryProceedKHR:
6032 vtn_push_nir_ssa(b, w[2],
6033 nir_rq_proceed(&b->nb, 1, vtn_ssa_value(b, w[3])->def));
6034 break;
6035
6036 case SpvOpRayQueryGenerateIntersectionKHR:
6037 nir_rq_generate_intersection(&b->nb,
6038 vtn_ssa_value(b, w[1])->def,
6039 vtn_ssa_value(b, w[2])->def);
6040 break;
6041
6042 case SpvOpRayQueryConfirmIntersectionKHR:
6043 nir_rq_confirm_intersection(&b->nb, vtn_ssa_value(b, w[1])->def);
6044 break;
6045
6046 case SpvOpRayQueryGetIntersectionTKHR:
6047 case SpvOpRayQueryGetIntersectionTypeKHR:
6048 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6049 case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6050 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6051 case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6052 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6053 case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6054 case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6055 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6056 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6057 case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6058 case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6059 case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6060 ray_query_load_intrinsic_create(b, opcode, w,
6061 vtn_ssa_value(b, w[3])->def,
6062 vtn_constant_uint(b, w[4]));
6063 break;
6064
6065 case SpvOpRayQueryGetRayTMinKHR:
6066 case SpvOpRayQueryGetRayFlagsKHR:
6067 case SpvOpRayQueryGetWorldRayDirectionKHR:
6068 case SpvOpRayQueryGetWorldRayOriginKHR:
6069 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6070 ray_query_load_intrinsic_create(b, opcode, w,
6071 vtn_ssa_value(b, w[3])->def,
6072 /* Committed value is ignored for these */
6073 false);
6074 break;
6075
6076 default:
6077 vtn_fail_with_opcode("Unhandled opcode", opcode);
6078 }
6079 }
6080
6081 static void
vtn_handle_initialize_node_payloads(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6082 vtn_handle_initialize_node_payloads(struct vtn_builder *b, SpvOp opcode,
6083 const uint32_t *w, unsigned count)
6084 {
6085 vtn_assert(opcode == SpvOpInitializeNodePayloadsAMDX);
6086
6087 nir_def *payloads = vtn_ssa_value(b, w[1])->def;
6088 mesa_scope scope = vtn_translate_scope(b, vtn_constant_uint(b, w[2]));
6089 nir_def *payload_count = vtn_ssa_value(b, w[3])->def;
6090 nir_def *node_index = vtn_ssa_value(b, w[4])->def;
6091
6092 nir_initialize_node_payloads(&b->nb, payloads, payload_count, node_index, .execution_scope = scope);
6093 }
6094
6095 static bool
vtn_handle_body_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)6096 vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
6097 const uint32_t *w, unsigned count)
6098 {
6099 if (b->options->debug_info) {
6100 nir_debug_info_instr *instr =
6101 nir_debug_info_instr_create(b->shader, nir_debug_info_src_loc, 0);
6102 instr->src_loc.spirv_offset = b->spirv_offset;
6103 instr->src_loc.source = nir_debug_info_spirv;
6104
6105 if (b->file) {
6106 nir_def *filename;
6107 struct hash_entry *he = _mesa_hash_table_search(b->strings, b->file);
6108 if (he) {
6109 filename = he->data;
6110 } else {
6111 nir_builder _b = nir_builder_at(nir_before_cf_list(&b->nb.impl->body));
6112 filename = nir_build_string(&_b, b->file);
6113 _mesa_hash_table_insert(b->strings, b->file, filename);
6114 }
6115
6116 instr->src_loc.filename = nir_src_for_ssa(filename);
6117 /* Make sure line is at least 1 since 0 is reserved for spirv_offset-only
6118 * source locations.
6119 */
6120 instr->src_loc.line = MAX2(b->line, 1);
6121 instr->src_loc.column = b->col;
6122 }
6123
6124 nir_builder_instr_insert(&b->nb, &instr->instr);
6125 }
6126
6127 switch (opcode) {
6128 case SpvOpLabel:
6129 break;
6130
6131 case SpvOpLoopMerge:
6132 case SpvOpSelectionMerge:
6133 /* This is handled by cfg pre-pass and walk_blocks */
6134 break;
6135
6136 case SpvOpUndef: {
6137 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
6138 val->type = vtn_get_type(b, w[1]);
6139 break;
6140 }
6141
6142 case SpvOpExtInst:
6143 case SpvOpExtInstWithForwardRefsKHR:
6144 vtn_handle_extension(b, opcode, w, count);
6145 break;
6146
6147 case SpvOpVariable:
6148 case SpvOpLoad:
6149 case SpvOpStore:
6150 case SpvOpCopyMemory:
6151 case SpvOpCopyMemorySized:
6152 case SpvOpAccessChain:
6153 case SpvOpPtrAccessChain:
6154 case SpvOpInBoundsAccessChain:
6155 case SpvOpInBoundsPtrAccessChain:
6156 case SpvOpArrayLength:
6157 case SpvOpConvertPtrToU:
6158 case SpvOpConvertUToPtr:
6159 case SpvOpGenericCastToPtrExplicit:
6160 case SpvOpGenericPtrMemSemantics:
6161 case SpvOpSubgroupBlockReadINTEL:
6162 case SpvOpSubgroupBlockWriteINTEL:
6163 case SpvOpConvertUToAccelerationStructureKHR:
6164 vtn_handle_variables(b, opcode, w, count);
6165 break;
6166
6167 case SpvOpFunctionCall:
6168 vtn_handle_function_call(b, opcode, w, count);
6169 break;
6170
6171 case SpvOpSampledImage:
6172 case SpvOpImage:
6173 case SpvOpImageSparseTexelsResident:
6174 case SpvOpImageSampleImplicitLod:
6175 case SpvOpImageSparseSampleImplicitLod:
6176 case SpvOpImageSampleExplicitLod:
6177 case SpvOpImageSparseSampleExplicitLod:
6178 case SpvOpImageSampleDrefImplicitLod:
6179 case SpvOpImageSparseSampleDrefImplicitLod:
6180 case SpvOpImageSampleDrefExplicitLod:
6181 case SpvOpImageSparseSampleDrefExplicitLod:
6182 case SpvOpImageSampleProjImplicitLod:
6183 case SpvOpImageSampleProjExplicitLod:
6184 case SpvOpImageSampleProjDrefImplicitLod:
6185 case SpvOpImageSampleProjDrefExplicitLod:
6186 case SpvOpImageFetch:
6187 case SpvOpImageSparseFetch:
6188 case SpvOpImageGather:
6189 case SpvOpImageSparseGather:
6190 case SpvOpImageDrefGather:
6191 case SpvOpImageSparseDrefGather:
6192 case SpvOpImageQueryLod:
6193 vtn_handle_texture(b, opcode, w, count);
6194 break;
6195
6196 case SpvOpImageRead:
6197 case SpvOpImageSparseRead:
6198 case SpvOpImageWrite:
6199 case SpvOpImageTexelPointer:
6200 case SpvOpImageQueryFormat:
6201 case SpvOpImageQueryOrder:
6202 vtn_handle_image(b, opcode, w, count);
6203 break;
6204
6205 case SpvOpImageQueryLevels:
6206 case SpvOpImageQuerySamples:
6207 case SpvOpImageQuerySizeLod:
6208 case SpvOpImageQuerySize: {
6209 struct vtn_type *image_type = vtn_get_value_type(b, w[3]);
6210 vtn_assert(image_type->base_type == vtn_base_type_image);
6211 if (glsl_type_is_image(image_type->glsl_image)) {
6212 vtn_handle_image(b, opcode, w, count);
6213 } else {
6214 vtn_assert(glsl_type_is_texture(image_type->glsl_image));
6215 vtn_handle_texture(b, opcode, w, count);
6216 }
6217 break;
6218 }
6219
6220 case SpvOpFragmentMaskFetchAMD:
6221 case SpvOpFragmentFetchAMD:
6222 vtn_handle_texture(b, opcode, w, count);
6223 break;
6224
6225 case SpvOpAtomicLoad:
6226 case SpvOpAtomicExchange:
6227 case SpvOpAtomicCompareExchange:
6228 case SpvOpAtomicCompareExchangeWeak:
6229 case SpvOpAtomicIIncrement:
6230 case SpvOpAtomicIDecrement:
6231 case SpvOpAtomicIAdd:
6232 case SpvOpAtomicISub:
6233 case SpvOpAtomicSMin:
6234 case SpvOpAtomicUMin:
6235 case SpvOpAtomicSMax:
6236 case SpvOpAtomicUMax:
6237 case SpvOpAtomicAnd:
6238 case SpvOpAtomicOr:
6239 case SpvOpAtomicXor:
6240 case SpvOpAtomicFAddEXT:
6241 case SpvOpAtomicFMinEXT:
6242 case SpvOpAtomicFMaxEXT:
6243 case SpvOpAtomicFlagTestAndSet: {
6244 struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
6245 if (pointer->value_type == vtn_value_type_image_pointer) {
6246 vtn_handle_image(b, opcode, w, count);
6247 } else {
6248 vtn_assert(pointer->value_type == vtn_value_type_pointer);
6249 vtn_handle_atomics(b, opcode, w, count);
6250 }
6251 break;
6252 }
6253
6254 case SpvOpAtomicStore:
6255 case SpvOpAtomicFlagClear: {
6256 struct vtn_value *pointer = vtn_untyped_value(b, w[1]);
6257 if (pointer->value_type == vtn_value_type_image_pointer) {
6258 vtn_handle_image(b, opcode, w, count);
6259 } else {
6260 vtn_assert(pointer->value_type == vtn_value_type_pointer);
6261 vtn_handle_atomics(b, opcode, w, count);
6262 }
6263 break;
6264 }
6265
6266 case SpvOpSelect:
6267 vtn_handle_select(b, opcode, w, count);
6268 break;
6269
6270 case SpvOpSNegate:
6271 case SpvOpFNegate:
6272 case SpvOpNot:
6273 case SpvOpAny:
6274 case SpvOpAll:
6275 case SpvOpConvertFToU:
6276 case SpvOpConvertFToS:
6277 case SpvOpConvertSToF:
6278 case SpvOpConvertUToF:
6279 case SpvOpUConvert:
6280 case SpvOpSConvert:
6281 case SpvOpFConvert:
6282 case SpvOpQuantizeToF16:
6283 case SpvOpSatConvertSToU:
6284 case SpvOpSatConvertUToS:
6285 case SpvOpPtrCastToGeneric:
6286 case SpvOpGenericCastToPtr:
6287 case SpvOpIsNan:
6288 case SpvOpIsInf:
6289 case SpvOpIsFinite:
6290 case SpvOpIsNormal:
6291 case SpvOpSignBitSet:
6292 case SpvOpLessOrGreater:
6293 case SpvOpOrdered:
6294 case SpvOpUnordered:
6295 case SpvOpIAdd:
6296 case SpvOpFAdd:
6297 case SpvOpISub:
6298 case SpvOpFSub:
6299 case SpvOpIMul:
6300 case SpvOpFMul:
6301 case SpvOpUDiv:
6302 case SpvOpSDiv:
6303 case SpvOpFDiv:
6304 case SpvOpUMod:
6305 case SpvOpSRem:
6306 case SpvOpSMod:
6307 case SpvOpFRem:
6308 case SpvOpFMod:
6309 case SpvOpVectorTimesScalar:
6310 case SpvOpDot:
6311 case SpvOpIAddCarry:
6312 case SpvOpISubBorrow:
6313 case SpvOpUMulExtended:
6314 case SpvOpSMulExtended:
6315 case SpvOpShiftRightLogical:
6316 case SpvOpShiftRightArithmetic:
6317 case SpvOpShiftLeftLogical:
6318 case SpvOpLogicalEqual:
6319 case SpvOpLogicalNotEqual:
6320 case SpvOpLogicalOr:
6321 case SpvOpLogicalAnd:
6322 case SpvOpLogicalNot:
6323 case SpvOpBitwiseOr:
6324 case SpvOpBitwiseXor:
6325 case SpvOpBitwiseAnd:
6326 case SpvOpIEqual:
6327 case SpvOpFOrdEqual:
6328 case SpvOpFUnordEqual:
6329 case SpvOpINotEqual:
6330 case SpvOpFOrdNotEqual:
6331 case SpvOpFUnordNotEqual:
6332 case SpvOpULessThan:
6333 case SpvOpSLessThan:
6334 case SpvOpFOrdLessThan:
6335 case SpvOpFUnordLessThan:
6336 case SpvOpUGreaterThan:
6337 case SpvOpSGreaterThan:
6338 case SpvOpFOrdGreaterThan:
6339 case SpvOpFUnordGreaterThan:
6340 case SpvOpULessThanEqual:
6341 case SpvOpSLessThanEqual:
6342 case SpvOpFOrdLessThanEqual:
6343 case SpvOpFUnordLessThanEqual:
6344 case SpvOpUGreaterThanEqual:
6345 case SpvOpSGreaterThanEqual:
6346 case SpvOpFOrdGreaterThanEqual:
6347 case SpvOpFUnordGreaterThanEqual:
6348 case SpvOpDPdx:
6349 case SpvOpDPdy:
6350 case SpvOpFwidth:
6351 case SpvOpDPdxFine:
6352 case SpvOpDPdyFine:
6353 case SpvOpFwidthFine:
6354 case SpvOpDPdxCoarse:
6355 case SpvOpDPdyCoarse:
6356 case SpvOpFwidthCoarse:
6357 case SpvOpBitFieldInsert:
6358 case SpvOpBitFieldSExtract:
6359 case SpvOpBitFieldUExtract:
6360 case SpvOpBitReverse:
6361 case SpvOpBitCount:
6362 case SpvOpTranspose:
6363 case SpvOpOuterProduct:
6364 case SpvOpMatrixTimesScalar:
6365 case SpvOpVectorTimesMatrix:
6366 case SpvOpMatrixTimesVector:
6367 case SpvOpMatrixTimesMatrix:
6368 case SpvOpUCountLeadingZerosINTEL:
6369 case SpvOpUCountTrailingZerosINTEL:
6370 case SpvOpAbsISubINTEL:
6371 case SpvOpAbsUSubINTEL:
6372 case SpvOpIAddSatINTEL:
6373 case SpvOpUAddSatINTEL:
6374 case SpvOpIAverageINTEL:
6375 case SpvOpUAverageINTEL:
6376 case SpvOpIAverageRoundedINTEL:
6377 case SpvOpUAverageRoundedINTEL:
6378 case SpvOpISubSatINTEL:
6379 case SpvOpUSubSatINTEL:
6380 case SpvOpIMul32x16INTEL:
6381 case SpvOpUMul32x16INTEL:
6382 vtn_handle_alu(b, opcode, w, count);
6383 break;
6384
6385 case SpvOpSDotKHR:
6386 case SpvOpUDotKHR:
6387 case SpvOpSUDotKHR:
6388 case SpvOpSDotAccSatKHR:
6389 case SpvOpUDotAccSatKHR:
6390 case SpvOpSUDotAccSatKHR:
6391 vtn_handle_integer_dot(b, opcode, w, count);
6392 break;
6393
6394 case SpvOpBitcast:
6395 vtn_handle_bitcast(b, w, count);
6396 break;
6397
6398 /* TODO: One day, we should probably do something with this information
6399 * For now, though, it's safe to implement them as no-ops.
6400 * Needed for Rusticl sycl support.
6401 */
6402 case SpvOpAssumeTrueKHR:
6403 break;
6404
6405 case SpvOpExpectKHR:
6406 case SpvOpVectorExtractDynamic:
6407 case SpvOpVectorInsertDynamic:
6408 case SpvOpVectorShuffle:
6409 case SpvOpCompositeConstruct:
6410 case SpvOpCompositeConstructReplicateEXT:
6411 case SpvOpCompositeExtract:
6412 case SpvOpCompositeInsert:
6413 case SpvOpCopyLogical:
6414 case SpvOpCopyObject:
6415 vtn_handle_composite(b, opcode, w, count);
6416 break;
6417
6418 case SpvOpEmitVertex:
6419 case SpvOpEndPrimitive:
6420 case SpvOpEmitStreamVertex:
6421 case SpvOpEndStreamPrimitive:
6422 case SpvOpControlBarrier:
6423 case SpvOpMemoryBarrier:
6424 vtn_handle_barrier(b, opcode, w, count);
6425 break;
6426
6427 case SpvOpGroupNonUniformElect:
6428 case SpvOpGroupNonUniformAll:
6429 case SpvOpGroupNonUniformAny:
6430 case SpvOpGroupNonUniformAllEqual:
6431 case SpvOpGroupNonUniformBroadcast:
6432 case SpvOpGroupNonUniformBroadcastFirst:
6433 case SpvOpGroupNonUniformBallot:
6434 case SpvOpGroupNonUniformInverseBallot:
6435 case SpvOpGroupNonUniformBallotBitExtract:
6436 case SpvOpGroupNonUniformBallotBitCount:
6437 case SpvOpGroupNonUniformBallotFindLSB:
6438 case SpvOpGroupNonUniformBallotFindMSB:
6439 case SpvOpGroupNonUniformShuffle:
6440 case SpvOpGroupNonUniformShuffleXor:
6441 case SpvOpGroupNonUniformShuffleUp:
6442 case SpvOpGroupNonUniformShuffleDown:
6443 case SpvOpGroupNonUniformIAdd:
6444 case SpvOpGroupNonUniformFAdd:
6445 case SpvOpGroupNonUniformIMul:
6446 case SpvOpGroupNonUniformFMul:
6447 case SpvOpGroupNonUniformSMin:
6448 case SpvOpGroupNonUniformUMin:
6449 case SpvOpGroupNonUniformFMin:
6450 case SpvOpGroupNonUniformSMax:
6451 case SpvOpGroupNonUniformUMax:
6452 case SpvOpGroupNonUniformFMax:
6453 case SpvOpGroupNonUniformBitwiseAnd:
6454 case SpvOpGroupNonUniformBitwiseOr:
6455 case SpvOpGroupNonUniformBitwiseXor:
6456 case SpvOpGroupNonUniformLogicalAnd:
6457 case SpvOpGroupNonUniformLogicalOr:
6458 case SpvOpGroupNonUniformLogicalXor:
6459 case SpvOpGroupNonUniformQuadBroadcast:
6460 case SpvOpGroupNonUniformQuadSwap:
6461 case SpvOpGroupNonUniformQuadAllKHR:
6462 case SpvOpGroupNonUniformQuadAnyKHR:
6463 case SpvOpGroupAll:
6464 case SpvOpGroupAny:
6465 case SpvOpGroupBroadcast:
6466 case SpvOpGroupIAdd:
6467 case SpvOpGroupFAdd:
6468 case SpvOpGroupFMin:
6469 case SpvOpGroupUMin:
6470 case SpvOpGroupSMin:
6471 case SpvOpGroupFMax:
6472 case SpvOpGroupUMax:
6473 case SpvOpGroupSMax:
6474 case SpvOpSubgroupBallotKHR:
6475 case SpvOpSubgroupFirstInvocationKHR:
6476 case SpvOpSubgroupReadInvocationKHR:
6477 case SpvOpSubgroupAllKHR:
6478 case SpvOpSubgroupAnyKHR:
6479 case SpvOpSubgroupAllEqualKHR:
6480 case SpvOpGroupIAddNonUniformAMD:
6481 case SpvOpGroupFAddNonUniformAMD:
6482 case SpvOpGroupFMinNonUniformAMD:
6483 case SpvOpGroupUMinNonUniformAMD:
6484 case SpvOpGroupSMinNonUniformAMD:
6485 case SpvOpGroupFMaxNonUniformAMD:
6486 case SpvOpGroupUMaxNonUniformAMD:
6487 case SpvOpGroupSMaxNonUniformAMD:
6488 case SpvOpSubgroupShuffleINTEL:
6489 case SpvOpSubgroupShuffleDownINTEL:
6490 case SpvOpSubgroupShuffleUpINTEL:
6491 case SpvOpSubgroupShuffleXorINTEL:
6492 case SpvOpGroupNonUniformRotateKHR:
6493 vtn_handle_subgroup(b, opcode, w, count);
6494 break;
6495
6496 case SpvOpPtrDiff:
6497 case SpvOpPtrEqual:
6498 case SpvOpPtrNotEqual:
6499 vtn_handle_ptr(b, opcode, w, count);
6500 break;
6501
6502 case SpvOpBeginInvocationInterlockEXT:
6503 nir_begin_invocation_interlock(&b->nb);
6504 break;
6505
6506 case SpvOpEndInvocationInterlockEXT:
6507 nir_end_invocation_interlock(&b->nb);
6508 break;
6509
6510 case SpvOpDemoteToHelperInvocation: {
6511 nir_demote(&b->nb);
6512 break;
6513 }
6514
6515 case SpvOpIsHelperInvocationEXT: {
6516 vtn_push_nir_ssa(b, w[2], nir_is_helper_invocation(&b->nb, 1));
6517 break;
6518 }
6519
6520 case SpvOpReadClockKHR: {
6521 SpvScope scope = vtn_constant_uint(b, w[3]);
6522 vtn_fail_if(scope != SpvScopeDevice && scope != SpvScopeSubgroup,
6523 "OpReadClockKHR Scope must be either "
6524 "ScopeDevice or ScopeSubgroup.");
6525
6526 /* Operation supports two result types: uvec2 and uint64_t. The NIR
6527 * intrinsic gives uvec2, so pack the result for the other case.
6528 */
6529 nir_def *result = nir_shader_clock(&b->nb, vtn_translate_scope(b, scope));
6530
6531 struct vtn_type *type = vtn_get_type(b, w[1]);
6532 const struct glsl_type *dest_type = type->type;
6533
6534 if (glsl_type_is_vector(dest_type)) {
6535 assert(dest_type == glsl_vector_type(GLSL_TYPE_UINT, 2));
6536 } else {
6537 assert(glsl_type_is_scalar(dest_type));
6538 assert(glsl_get_base_type(dest_type) == GLSL_TYPE_UINT64);
6539 result = nir_pack_64_2x32(&b->nb, result);
6540 }
6541
6542 vtn_push_nir_ssa(b, w[2], result);
6543 break;
6544 }
6545
6546 case SpvOpTraceNV:
6547 case SpvOpTraceRayKHR:
6548 case SpvOpReportIntersectionKHR:
6549 case SpvOpIgnoreIntersectionNV:
6550 case SpvOpTerminateRayNV:
6551 case SpvOpExecuteCallableNV:
6552 case SpvOpExecuteCallableKHR:
6553 vtn_handle_ray_intrinsic(b, opcode, w, count);
6554 break;
6555
6556 case SpvOpRayQueryInitializeKHR:
6557 case SpvOpRayQueryTerminateKHR:
6558 case SpvOpRayQueryGenerateIntersectionKHR:
6559 case SpvOpRayQueryConfirmIntersectionKHR:
6560 case SpvOpRayQueryProceedKHR:
6561 case SpvOpRayQueryGetIntersectionTypeKHR:
6562 case SpvOpRayQueryGetRayTMinKHR:
6563 case SpvOpRayQueryGetRayFlagsKHR:
6564 case SpvOpRayQueryGetIntersectionTKHR:
6565 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6566 case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6567 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6568 case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6569 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6570 case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6571 case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6572 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6573 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6574 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6575 case SpvOpRayQueryGetWorldRayDirectionKHR:
6576 case SpvOpRayQueryGetWorldRayOriginKHR:
6577 case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6578 case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6579 case SpvOpRayQueryGetIntersectionTriangleVertexPositionsKHR:
6580 vtn_handle_ray_query_intrinsic(b, opcode, w, count);
6581 break;
6582
6583 case SpvOpLifetimeStart:
6584 case SpvOpLifetimeStop:
6585 break;
6586
6587 case SpvOpGroupAsyncCopy:
6588 case SpvOpGroupWaitEvents:
6589 vtn_handle_opencl_core_instruction(b, opcode, w, count);
6590 break;
6591
6592 case SpvOpWritePackedPrimitiveIndices4x8NV:
6593 vtn_handle_write_packed_primitive_indices(b, opcode, w, count);
6594 break;
6595
6596 case SpvOpSetMeshOutputsEXT:
6597 nir_set_vertex_and_primitive_count(
6598 &b->nb, vtn_get_nir_ssa(b, w[1]), vtn_get_nir_ssa(b, w[2]),
6599 nir_undef(&b->nb, 1, 32));
6600 break;
6601
6602 case SpvOpInitializeNodePayloadsAMDX:
6603 vtn_handle_initialize_node_payloads(b, opcode, w, count);
6604 break;
6605
6606 case SpvOpFinalizeNodePayloadsAMDX:
6607 break;
6608
6609 case SpvOpFinishWritingNodePayloadAMDX:
6610 break;
6611
6612 case SpvOpCooperativeMatrixLoadKHR:
6613 case SpvOpCooperativeMatrixStoreKHR:
6614 case SpvOpCooperativeMatrixLengthKHR:
6615 case SpvOpCooperativeMatrixMulAddKHR:
6616 vtn_handle_cooperative_instruction(b, opcode, w, count);
6617 break;
6618
6619 default:
6620 vtn_fail_with_opcode("Unhandled opcode", opcode);
6621 }
6622
6623 return true;
6624 }
6625
6626 static bool
is_glslang(const struct vtn_builder * b)6627 is_glslang(const struct vtn_builder *b)
6628 {
6629 return b->generator_id == vtn_generator_glslang_reference_front_end ||
6630 b->generator_id == vtn_generator_shaderc_over_glslang;
6631 }
6632
6633 struct vtn_builder*
vtn_create_builder(const uint32_t * words,size_t word_count,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options)6634 vtn_create_builder(const uint32_t *words, size_t word_count,
6635 gl_shader_stage stage, const char *entry_point_name,
6636 const struct spirv_to_nir_options *options)
6637 {
6638 /* Initialize the vtn_builder object */
6639 struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
6640
6641 b->spirv = words;
6642 b->spirv_word_count = word_count;
6643 b->file = NULL;
6644 b->line = -1;
6645 b->col = -1;
6646 list_inithead(&b->functions);
6647 b->entry_point_stage = stage;
6648 b->entry_point_name = entry_point_name;
6649
6650 /*
6651 * Handle the SPIR-V header (first 5 dwords).
6652 * Can't use vtx_assert() as the setjmp(3) target isn't initialized yet.
6653 */
6654 if (word_count <= 5)
6655 goto fail;
6656
6657 if (words[0] != SpvMagicNumber) {
6658 vtn_err("words[0] was 0x%x, want 0x%x", words[0], SpvMagicNumber);
6659 goto fail;
6660 }
6661
6662 b->version = words[1];
6663 if (b->version < 0x10000) {
6664 vtn_err("version was 0x%x, want >= 0x10000", b->version);
6665 goto fail;
6666 }
6667
6668 b->generator_id = words[2] >> 16;
6669 uint16_t generator_version = words[2];
6670
6671 unsigned value_id_bound = words[3];
6672 if (words[4] != 0) {
6673 vtn_err("words[4] was %u, want 0", words[4]);
6674 goto fail;
6675 }
6676
6677 b->value_id_bound = value_id_bound;
6678
6679 /* Allocate all the data that can be dropped after parsing using
6680 * a cheaper allocation strategy. Use the value_id_bound and the
6681 * size of the common internal structs to approximate a good
6682 * buffer_size.
6683 */
6684 const linear_opts lin_opts = {
6685 .min_buffer_size = 2 * value_id_bound * (sizeof(struct vtn_value) +
6686 sizeof(struct vtn_ssa_value)),
6687 };
6688 b->lin_ctx = linear_context_with_opts(b, &lin_opts);
6689
6690 struct spirv_to_nir_options *dup_options =
6691 vtn_alloc(b, struct spirv_to_nir_options);
6692 *dup_options = *options;
6693
6694 b->options = dup_options;
6695 b->values = vtn_zalloc_array(b, struct vtn_value, value_id_bound);
6696
6697 if (b->options->capabilities != NULL)
6698 b->supported_capabilities = *b->options->capabilities;
6699 else
6700 b->supported_capabilities = implemented_capabilities;
6701
6702 spirv_capabilities_set(&b->supported_capabilities, SpvCapabilityLinkage,
6703 b->options->create_library);
6704
6705 /* In GLSLang commit 8297936dd6eb3, their handling of barrier() was fixed
6706 * to provide correct memory semantics on compute shader barrier()
6707 * commands. Prior to that, we need to fix them up ourselves. This
6708 * GLSLang fix caused them to bump to generator version 3.
6709 */
6710 b->wa_glslang_cs_barrier = is_glslang(b) && generator_version < 3;
6711
6712 /* Identifying the LLVM-SPIRV translator:
6713 *
6714 * The LLVM-SPIRV translator currently doesn't store any generator ID [1].
6715 * Our use case involving the SPIRV-Tools linker also mean we want to check
6716 * for that tool instead. Finally the SPIRV-Tools linker also stores its
6717 * generator ID in the wrong location [2].
6718 *
6719 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1223
6720 * [2] : https://github.com/KhronosGroup/SPIRV-Tools/pull/4549
6721 */
6722 const bool is_llvm_spirv_translator =
6723 (b->generator_id == 0 &&
6724 generator_version == vtn_generator_spirv_tools_linker) ||
6725 b->generator_id == vtn_generator_spirv_tools_linker;
6726
6727 /* The LLVM-SPIRV translator generates Undef initializers for _local
6728 * variables [1].
6729 *
6730 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1224
6731 */
6732 b->wa_llvm_spirv_ignore_workgroup_initializer =
6733 b->options->environment == NIR_SPIRV_OPENCL && is_llvm_spirv_translator;
6734
6735 /* Older versions of GLSLang would incorrectly emit OpReturn after
6736 * OpEmitMeshTasksEXT. This is incorrect since the latter is already
6737 * a terminator instruction.
6738 *
6739 * See https://github.com/KhronosGroup/glslang/issues/3020 for details.
6740 *
6741 * Clay Shader Compiler (used by GravityMark) is also affected.
6742 */
6743 b->wa_ignore_return_after_emit_mesh_tasks =
6744 (is_glslang(b) && generator_version < 11) ||
6745 (b->generator_id == vtn_generator_clay_shader_compiler &&
6746 generator_version < 18);
6747
6748 if (b->options->environment == NIR_SPIRV_VULKAN && b->version < 0x10400)
6749 b->vars_used_indirectly = _mesa_pointer_set_create(b);
6750
6751 if (b->options->debug_info)
6752 b->strings = _mesa_pointer_hash_table_create(b);
6753
6754 return b;
6755 fail:
6756 ralloc_free(b);
6757 return NULL;
6758 }
6759
6760 static nir_function *
vtn_emit_kernel_entry_point_wrapper(struct vtn_builder * b,nir_function * entry_point)6761 vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b,
6762 nir_function *entry_point)
6763 {
6764 vtn_assert(entry_point == b->entry_point->func->nir_func);
6765 vtn_fail_if(!entry_point->name, "entry points are required to have a name");
6766 const char *func_name =
6767 ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name);
6768
6769 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
6770
6771 nir_function *main_entry_point = nir_function_create(b->shader, func_name);
6772 nir_function_impl *impl = nir_function_impl_create(main_entry_point);
6773 b->nb = nir_builder_at(nir_after_impl(impl));
6774 b->func_param_idx = 0;
6775
6776 nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point);
6777
6778 for (unsigned i = 0; i < entry_point->num_params; ++i) {
6779 struct vtn_type *param_type = b->entry_point->func->type->params[i];
6780
6781 b->shader->info.cs.has_variable_shared_mem |=
6782 param_type->storage_class == SpvStorageClassWorkgroup;
6783
6784 /* consider all pointers to function memory to be parameters passed
6785 * by value
6786 */
6787 bool is_by_val = param_type->base_type == vtn_base_type_pointer &&
6788 param_type->storage_class == SpvStorageClassFunction;
6789
6790 /* input variable */
6791 nir_variable *in_var = rzalloc(b->nb.shader, nir_variable);
6792
6793 if (is_by_val) {
6794 in_var->data.mode = nir_var_uniform;
6795 in_var->type = param_type->pointed->type;
6796 } else if (param_type->base_type == vtn_base_type_image) {
6797 in_var->data.mode = nir_var_image;
6798 in_var->type = param_type->glsl_image;
6799 in_var->data.access =
6800 spirv_to_gl_access_qualifier(b, param_type->access_qualifier);
6801 } else if (param_type->base_type == vtn_base_type_sampler) {
6802 in_var->data.mode = nir_var_uniform;
6803 in_var->type = glsl_bare_sampler_type();
6804 } else {
6805 in_var->data.mode = nir_var_uniform;
6806 in_var->type = param_type->type;
6807 }
6808
6809 in_var->data.read_only = true;
6810 in_var->data.location = i;
6811
6812 nir_shader_add_variable(b->nb.shader, in_var);
6813
6814 /* we have to copy the entire variable into function memory */
6815 if (is_by_val) {
6816 nir_variable *copy_var =
6817 nir_local_variable_create(impl, in_var->type, "copy_in");
6818 nir_copy_var(&b->nb, copy_var, in_var);
6819 call->params[i] =
6820 nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->def);
6821 } else if (param_type->base_type == vtn_base_type_image ||
6822 param_type->base_type == vtn_base_type_sampler) {
6823 /* Don't load the var, just pass a deref of it */
6824 call->params[i] = nir_src_for_ssa(&nir_build_deref_var(&b->nb, in_var)->def);
6825 } else {
6826 call->params[i] = nir_src_for_ssa(nir_load_var(&b->nb, in_var));
6827 }
6828 }
6829
6830 nir_builder_instr_insert(&b->nb, &call->instr);
6831
6832 return main_entry_point;
6833 }
6834
6835 static bool
can_remove(nir_variable * var,void * data)6836 can_remove(nir_variable *var, void *data)
6837 {
6838 const struct set *vars_used_indirectly = data;
6839 return !_mesa_set_search(vars_used_indirectly, var);
6840 }
6841
6842 nir_shader *
spirv_to_nir(const uint32_t * words,size_t word_count,struct nir_spirv_specialization * spec,unsigned num_spec,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options,const nir_shader_compiler_options * nir_options)6843 spirv_to_nir(const uint32_t *words, size_t word_count,
6844 struct nir_spirv_specialization *spec, unsigned num_spec,
6845 gl_shader_stage stage, const char *entry_point_name,
6846 const struct spirv_to_nir_options *options,
6847 const nir_shader_compiler_options *nir_options)
6848
6849 {
6850 mesa_spirv_debug_init();
6851
6852 if (MESA_SPIRV_DEBUG(ASM))
6853 spirv_print_asm(stderr, words, word_count);
6854
6855 const uint32_t *word_end = words + word_count;
6856
6857 struct vtn_builder *b = vtn_create_builder(words, word_count,
6858 stage, entry_point_name,
6859 options);
6860
6861 if (b == NULL)
6862 return NULL;
6863
6864 /* See also _vtn_fail() */
6865 if (vtn_setjmp(b->fail_jump)) {
6866 ralloc_free(b);
6867 return NULL;
6868 }
6869
6870 const char *dump_path = secure_getenv("MESA_SPIRV_DUMP_PATH");
6871 if (dump_path)
6872 vtn_dump_shader(b, dump_path, "spirv");
6873
6874 b->shader = nir_shader_create(b, stage, nir_options, NULL);
6875 b->shader->info.subgroup_size = options->subgroup_size;
6876 b->shader->info.float_controls_execution_mode = options->float_controls_execution_mode;
6877 b->shader->info.cs.shader_index = options->shader_index;
6878 _mesa_blake3_compute(words, word_count * sizeof(uint32_t), b->shader->info.source_blake3);
6879
6880 /* Skip the SPIR-V header, handled at vtn_create_builder */
6881 words+= 5;
6882
6883 /* Handle all the preamble instructions */
6884 words = vtn_foreach_instruction(b, words, word_end,
6885 vtn_handle_preamble_instruction);
6886
6887 /* DirectXShaderCompiler and glslang/shaderc both create OpKill from HLSL's
6888 * discard/clip, which uses demote semantics. DirectXShaderCompiler will use
6889 * demote if the extension is enabled, so we disable this workaround in that
6890 * case.
6891 *
6892 * Related glslang issue: https://github.com/KhronosGroup/glslang/issues/2416
6893 */
6894 bool dxsc = b->generator_id == vtn_generator_spiregg;
6895 b->convert_discard_to_demote = (nir_options->discard_is_demote ||
6896 (dxsc && !b->enabled_capabilities.DemoteToHelperInvocation) ||
6897 (is_glslang(b) && b->source_lang == SpvSourceLanguageHLSL)) &&
6898 b->supported_capabilities.DemoteToHelperInvocation;
6899
6900 if (!options->create_library && b->entry_point == NULL) {
6901 vtn_fail("Entry point not found for %s shader \"%s\"",
6902 _mesa_shader_stage_to_string(stage), entry_point_name);
6903 ralloc_free(b);
6904 return NULL;
6905 }
6906
6907 /* Ensure a sane address mode is being used for function temps */
6908 assert(nir_address_format_bit_size(b->options->temp_addr_format) == nir_get_ptr_bitsize(b->shader));
6909 assert(nir_address_format_num_components(b->options->temp_addr_format) == 1);
6910
6911 /* Set shader info defaults */
6912 if (stage == MESA_SHADER_GEOMETRY)
6913 b->shader->info.gs.invocations = 1;
6914
6915 /* Parse execution modes. */
6916 if (!options->create_library)
6917 vtn_foreach_execution_mode(b, b->entry_point,
6918 vtn_handle_execution_mode, NULL);
6919
6920 b->specializations = spec;
6921 b->num_specializations = num_spec;
6922
6923 /* Handle all variable, type, and constant instructions */
6924 words = vtn_foreach_instruction(b, words, word_end,
6925 vtn_handle_variable_or_type_instruction);
6926
6927 /* Parse execution modes that depend on IDs. Must happen after we have
6928 * constants parsed.
6929 */
6930 if (!options->create_library)
6931 vtn_foreach_execution_mode(b, b->entry_point,
6932 vtn_handle_execution_mode_id, NULL);
6933
6934 if (b->workgroup_size_builtin) {
6935 vtn_assert(gl_shader_stage_uses_workgroup(stage));
6936 vtn_assert(b->workgroup_size_builtin->type->type ==
6937 glsl_vector_type(GLSL_TYPE_UINT, 3));
6938
6939 nir_const_value *const_size =
6940 b->workgroup_size_builtin->constant->values;
6941
6942 b->shader->info.workgroup_size[0] = const_size[0].u32;
6943 b->shader->info.workgroup_size[1] = const_size[1].u32;
6944 b->shader->info.workgroup_size[2] = const_size[2].u32;
6945 }
6946
6947 /* Set types on all vtn_values */
6948 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
6949
6950 vtn_build_cfg(b, words, word_end);
6951
6952 if (!options->create_library) {
6953 assert(b->entry_point->value_type == vtn_value_type_function);
6954 b->entry_point->func->referenced = true;
6955 }
6956
6957 bool progress;
6958 do {
6959 progress = false;
6960 vtn_foreach_function(func, &b->functions) {
6961 if ((options->create_library || func->referenced) && !func->emitted) {
6962 _mesa_hash_table_clear(b->strings, NULL);
6963 vtn_function_emit(b, func, vtn_handle_body_instruction);
6964 progress = true;
6965 }
6966 }
6967 } while (progress);
6968
6969 if (!options->create_library) {
6970 vtn_assert(b->entry_point->value_type == vtn_value_type_function);
6971 nir_function *entry_point = b->entry_point->func->nir_func;
6972 vtn_assert(entry_point);
6973
6974 entry_point->dont_inline = false;
6975 /* post process entry_points with input params */
6976 if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL)
6977 entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point);
6978
6979 entry_point->is_entrypoint = true;
6980 }
6981
6982 if (MESA_SPIRV_DEBUG(VALUES)) {
6983 vtn_dump_values(b, stdout);
6984 }
6985
6986 /* structurize the CFG */
6987 nir_lower_goto_ifs(b->shader);
6988
6989 nir_validate_shader(b->shader, "after spirv cfg");
6990
6991 nir_lower_continue_constructs(b->shader);
6992
6993 /* A SPIR-V module can have multiple shaders stages and also multiple
6994 * shaders of the same stage. Global variables are declared per-module.
6995 *
6996 * Starting in SPIR-V 1.4 the list of global variables is part of
6997 * OpEntryPoint, so only valid ones will be created. Previous versions
6998 * only have Input and Output variables listed, so remove dead variables to
6999 * clean up the remaining ones.
7000 */
7001 if (!options->create_library && b->version < 0x10400) {
7002 const nir_remove_dead_variables_options dead_opts = {
7003 .can_remove_var = can_remove,
7004 .can_remove_var_data = b->vars_used_indirectly,
7005 };
7006 nir_remove_dead_variables(b->shader, ~(nir_var_function_temp |
7007 nir_var_shader_out |
7008 nir_var_shader_in |
7009 nir_var_system_value),
7010 b->vars_used_indirectly ? &dead_opts : NULL);
7011 }
7012
7013 nir_foreach_variable_in_shader(var, b->shader) {
7014 switch (var->data.mode) {
7015 case nir_var_mem_ubo:
7016 b->shader->info.num_ubos++;
7017 break;
7018 case nir_var_mem_ssbo:
7019 b->shader->info.num_ssbos++;
7020 break;
7021 case nir_var_mem_push_const:
7022 vtn_assert(b->shader->num_uniforms == 0);
7023 b->shader->num_uniforms =
7024 glsl_get_explicit_size(glsl_without_array(var->type), false);
7025 break;
7026 }
7027 }
7028
7029 /* We sometimes generate bogus derefs that, while never used, give the
7030 * validator a bit of heartburn. Run dead code to get rid of them.
7031 */
7032 nir_opt_dce(b->shader);
7033
7034 /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is
7035 * a Block, all of them will be and Blocks are explicitly laid out.
7036 */
7037 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7038 if (glsl_type_is_interface(var->type)) {
7039 assert(b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR);
7040 b->shader->info.shared_memory_explicit_layout = true;
7041 break;
7042 }
7043 }
7044 if (b->shader->info.shared_memory_explicit_layout) {
7045 unsigned size = 0;
7046 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
7047 assert(glsl_type_is_interface(var->type));
7048 const bool align_to_stride = false;
7049 size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
7050 }
7051 b->shader->info.shared_size = size;
7052 }
7053
7054 if (stage == MESA_SHADER_FRAGMENT) {
7055 /* From the Vulkan 1.2.199 spec:
7056 *
7057 * "If a fragment shader entry point’s interface includes an input
7058 * variable decorated with SamplePosition, Sample Shading is
7059 * considered enabled with a minSampleShading value of 1.0."
7060 *
7061 * Similar text exists for SampleId. Regarding the Sample decoration,
7062 * the Vulkan 1.2.199 spec says:
7063 *
7064 * "If a fragment shader input is decorated with Sample, a separate
7065 * value must be assigned to that variable for each covered sample in
7066 * the fragment, and that value must be sampled at the location of
7067 * the individual sample. When rasterizationSamples is
7068 * VK_SAMPLE_COUNT_1_BIT, the fragment center must be used for
7069 * Centroid, Sample, and undecorated attribute interpolation."
7070 *
7071 * Unfortunately, this isn't quite as clear about static use and the
7072 * interface but the static use check should be valid.
7073 *
7074 * For OpenGL, similar language exists but it's all more wishy-washy.
7075 * We'll assume the same behavior across APIs.
7076 */
7077 nir_foreach_variable_with_modes(var, b->shader,
7078 nir_var_shader_in |
7079 nir_var_system_value) {
7080 struct nir_variable_data *members =
7081 var->members ? var->members : &var->data;
7082 uint16_t num_members = var->members ? var->num_members : 1;
7083 for (uint16_t i = 0; i < num_members; i++) {
7084 if (members[i].mode == nir_var_system_value &&
7085 (members[i].location == SYSTEM_VALUE_SAMPLE_ID ||
7086 members[i].location == SYSTEM_VALUE_SAMPLE_POS))
7087 b->shader->info.fs.uses_sample_shading = true;
7088
7089 if (members[i].mode == nir_var_shader_in && members[i].sample)
7090 b->shader->info.fs.uses_sample_shading = true;
7091 }
7092 }
7093 }
7094
7095 /* Work around applications that declare shader_call_data variables inside
7096 * ray generation shaders or multiple shader_call_data variables in callable
7097 * shaders.
7098 *
7099 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/5326
7100 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/11585
7101 */
7102 if (gl_shader_stage_is_rt(b->shader->info.stage))
7103 NIR_PASS(_, b->shader, nir_remove_dead_variables, nir_var_shader_call_data,
7104 NULL);
7105
7106 /* Unparent the shader from the vtn_builder before we delete the builder */
7107 ralloc_steal(NULL, b->shader);
7108
7109 nir_shader *shader = b->shader;
7110 ralloc_free(b);
7111
7112 return shader;
7113 }
7114
7115 static bool
func_to_nir_builder(FILE * fp,struct vtn_function * func)7116 func_to_nir_builder(FILE *fp, struct vtn_function *func)
7117 {
7118 nir_function *nir_func = func->nir_func;
7119 struct vtn_type *return_type = func->type->return_type;
7120 bool returns = return_type->base_type != vtn_base_type_void;
7121
7122 if (returns && return_type->base_type != vtn_base_type_scalar &&
7123 return_type->base_type != vtn_base_type_vector) {
7124 fprintf(stderr, "Unsupported return type for %s", nir_func->name);
7125 return false;
7126 }
7127
7128 /* If there is a return type, the first NIR parameter is the return deref,
7129 * so offset by that for logical parameter iteration.
7130 */
7131 unsigned first_param = returns ? 1 : 0;
7132
7133 /* Generate function signature */
7134 fprintf(fp, "static inline %s\n", returns ? "nir_def *": "void");
7135 fprintf(fp, "%s(nir_builder *b", nir_func->name);
7136
7137 /* TODO: Can we recover parameter names? */
7138 for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7139 fprintf(fp, ", nir_def *arg%u", i);
7140 }
7141
7142 fprintf(fp, ")\n{\n");
7143
7144 /* Validate inputs. nir_validate will do this too, but the
7145 * errors/backtraces from these asserts should be nicer.
7146 */
7147 for (unsigned i = first_param; i < nir_func->num_params; ++i) {
7148 nir_parameter *param = &nir_func->params[i];
7149 fprintf(fp, " assert(arg%u->bit_size == %u);\n", i, param->bit_size);
7150 fprintf(fp, " assert(arg%u->num_components == %u);\n", i,
7151 param->num_components);
7152 fprintf(fp, "\n");
7153 }
7154
7155 /* Find the function to call. If not found, create a prototype */
7156 fprintf(fp, " nir_function *func = nir_shader_get_function_for_name(b->shader, \"%s\");\n",
7157 nir_func->name);
7158 fprintf(fp, "\n");
7159 fprintf(fp, " if (!func) {\n");
7160 fprintf(fp, " func = nir_function_create(b->shader, \"%s\");\n",
7161 nir_func->name);
7162 fprintf(fp, " func->num_params = %u;\n", nir_func->num_params);
7163 fprintf(fp, " func->params = ralloc_array(b->shader, nir_parameter, func->num_params);\n");
7164
7165 for (unsigned i = 0; i < nir_func->num_params; ++i) {
7166 fprintf(fp, "\n");
7167 fprintf(fp, " func->params[%u].bit_size = %u;\n", i,
7168 nir_func->params[i].bit_size);
7169 fprintf(fp, " func->params[%u].num_components = %u;\n", i,
7170 nir_func->params[i].num_components);
7171 }
7172
7173 fprintf(fp, " }\n\n");
7174
7175
7176 if (returns) {
7177 /* We assume that vec3 variables are lowered to vec4. Mirror that here so
7178 * we don't need to lower vec3 to vec4 again at link-time.
7179 */
7180 assert(glsl_type_is_vector_or_scalar(return_type->type));
7181 unsigned elements = return_type->type->vector_elements;
7182 if (elements == 3)
7183 elements = 4;
7184
7185 /* Reconstruct the return type. */
7186 fprintf(fp, " const struct glsl_type *ret_type = glsl_vector_type(%u, %u);\n",
7187 return_type->type->base_type, elements);
7188
7189 /* With the type, we can make a variable and get a deref to pass in */
7190 fprintf(fp, " nir_variable *ret = nir_local_variable_create(b->impl, ret_type, \"return\");\n");
7191 fprintf(fp, " nir_deref_instr *deref = nir_build_deref_var(b, ret);\n");
7192
7193 /* XXX: This is a hack due to ptr size differing between KERNEL and other
7194 * shader stages. This needs to be fixed in core NIR.
7195 */
7196 fprintf(fp, " deref->def.bit_size = %u;\n", nir_func->params[0].bit_size);
7197 fprintf(fp, "\n");
7198 }
7199
7200 /* Call the function */
7201 fprintf(fp, " nir_call(b, func");
7202
7203 if (returns)
7204 fprintf(fp, ", &deref->def");
7205
7206 for (unsigned i = first_param; i < nir_func->num_params; ++i)
7207 fprintf(fp, ", arg%u", i);
7208
7209 fprintf(fp, ");\n");
7210
7211 /* Load the return value if any, undoing the vec3->vec4 lowering. */
7212 if (returns) {
7213 fprintf(fp, "\n");
7214
7215 if (return_type->type->vector_elements == 3)
7216 fprintf(fp, " return nir_trim_vector(b, nir_load_deref(b, deref), 3);\n");
7217 else
7218 fprintf(fp, " return nir_load_deref(b, deref);\n");
7219 }
7220
7221 fprintf(fp, "}\n\n");
7222 return true;
7223 }
7224
7225 bool
spirv_library_to_nir_builder(FILE * fp,const uint32_t * words,size_t word_count,const struct spirv_to_nir_options * options)7226 spirv_library_to_nir_builder(FILE *fp, const uint32_t *words, size_t word_count,
7227 const struct spirv_to_nir_options *options)
7228 {
7229 #ifndef NDEBUG
7230 mesa_spirv_debug_init();
7231 #endif
7232
7233 const uint32_t *word_end = words + word_count;
7234
7235 struct vtn_builder *b = vtn_create_builder(words, word_count,
7236 MESA_SHADER_KERNEL, "placeholder name",
7237 options);
7238
7239 if (b == NULL)
7240 return false;
7241
7242 /* See also _vtn_fail() */
7243 if (vtn_setjmp(b->fail_jump)) {
7244 ralloc_free(b);
7245 return false;
7246 }
7247
7248 b->shader = nir_shader_create(b, MESA_SHADER_KERNEL,
7249 &(const nir_shader_compiler_options){0}, NULL);
7250
7251 /* Skip the SPIR-V header, handled at vtn_create_builder */
7252 words+= 5;
7253
7254 /* Handle all the preamble instructions */
7255 words = vtn_foreach_instruction(b, words, word_end,
7256 vtn_handle_preamble_instruction);
7257
7258 /* Handle all variable, type, and constant instructions */
7259 words = vtn_foreach_instruction(b, words, word_end,
7260 vtn_handle_variable_or_type_instruction);
7261
7262 /* Set types on all vtn_values */
7263 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
7264
7265 vtn_build_cfg(b, words, word_end);
7266
7267 fprintf(fp, "#include \"compiler/nir/nir_builder.h\"\n\n");
7268
7269 vtn_foreach_function(func, &b->functions) {
7270 if (func->linkage != SpvLinkageTypeExport)
7271 continue;
7272
7273 if (!func_to_nir_builder(fp, func))
7274 return false;
7275 }
7276
7277 ralloc_free(b);
7278 return true;
7279 }
7280
7281 static unsigned
vtn_id_for_type(struct vtn_builder * b,struct vtn_type * type)7282 vtn_id_for_type(struct vtn_builder *b, struct vtn_type *type)
7283 {
7284 for (unsigned i = 0; i < b->value_id_bound; i++) {
7285 struct vtn_value *v = &b->values[i];
7286 if (v->value_type == vtn_value_type_type &&
7287 v->type == type)
7288 return i;
7289 }
7290
7291 return 0;
7292 }
7293
7294 void
vtn_print_value(struct vtn_builder * b,struct vtn_value * val,FILE * f)7295 vtn_print_value(struct vtn_builder *b, struct vtn_value *val, FILE *f)
7296 {
7297 fprintf(f, "%s", vtn_value_type_to_string(val->value_type));
7298 switch (val->value_type) {
7299 case vtn_value_type_ssa: {
7300 struct vtn_ssa_value *ssa = val->ssa;
7301 fprintf(f, " glsl_type=%s", glsl_get_type_name(ssa->type));
7302 break;
7303 }
7304
7305 case vtn_value_type_constant: {
7306 fprintf(f, " type=%d", vtn_id_for_type(b, val->type));
7307 if (val->is_null_constant)
7308 fprintf(f, " null");
7309 else if (val->is_undef_constant)
7310 fprintf(f, " undef");
7311 break;
7312 }
7313
7314 case vtn_value_type_pointer: {
7315 struct vtn_pointer *pointer = val->pointer;
7316 fprintf(f, " ptr_type=%u", vtn_id_for_type(b, pointer->type));
7317 fprintf(f, " (pointed-)type=%u", vtn_id_for_type(b, val->pointer->type->pointed));
7318
7319 if (pointer->deref) {
7320 fprintf(f, "\n NIR: ");
7321 nir_print_instr(&pointer->deref->instr, f);
7322 }
7323 break;
7324 }
7325
7326 case vtn_value_type_type: {
7327 struct vtn_type *type = val->type;
7328 fprintf(f, " %s", vtn_base_type_to_string(type->base_type));
7329 switch (type->base_type) {
7330 case vtn_base_type_pointer:
7331 fprintf(f, " deref=%d", vtn_id_for_type(b, type->pointed));
7332 fprintf(f, " %s", spirv_storageclass_to_string(val->type->storage_class));
7333 break;
7334 default:
7335 break;
7336 }
7337 if (type->type)
7338 fprintf(f, " glsl_type=%s", glsl_get_type_name(type->type));
7339 break;
7340 }
7341
7342 default:
7343 break;
7344 }
7345 fprintf(f, "\n");
7346 }
7347
7348 void
vtn_dump_values(struct vtn_builder * b,FILE * f)7349 vtn_dump_values(struct vtn_builder *b, FILE *f)
7350 {
7351 fprintf(f, "=== SPIR-V values\n");
7352 for (unsigned i = 1; i < b->value_id_bound; i++) {
7353 struct vtn_value *val = &b->values[i];
7354 fprintf(f, "%8d = ", i);
7355 vtn_print_value(b, val, f);
7356 }
7357 fprintf(f, "===\n");
7358 }
7359