1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott ([email protected])
25 *
26 */
27
28 #include "nir.h"
29 #include <assert.h>
30 #include <limits.h>
31 #include <math.h>
32 #include "util/half_float.h"
33 #include "util/macros.h"
34 #include "util/u_math.h"
35 #include "util/u_qsort.h"
36 #include "nir_builder.h"
37 #include "nir_control_flow_private.h"
38 #include "nir_worklist.h"
39
40 #ifndef NDEBUG
41 uint32_t nir_debug = 0;
42 bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1] = { 0 };
43
44 static const struct debug_named_value nir_debug_control[] = {
45 { "clone", NIR_DEBUG_CLONE,
46 "Test cloning a shader at each successful lowering/optimization call" },
47 { "serialize", NIR_DEBUG_SERIALIZE,
48 "Test serialize and deserialize shader at each successful lowering/optimization call" },
49 { "novalidate", NIR_DEBUG_NOVALIDATE,
50 "Disable shader validation at each successful lowering/optimization call" },
51 { "validate_ssa_dominance", NIR_DEBUG_VALIDATE_SSA_DOMINANCE,
52 "Validate SSA dominance in shader at each successful lowering/optimization call" },
53 { "tgsi", NIR_DEBUG_TGSI,
54 "Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
55 { "print", NIR_DEBUG_PRINT,
56 "Dump resulting shader after each successful lowering/optimization call" },
57 { "print_vs", NIR_DEBUG_PRINT_VS,
58 "Dump resulting vertex shader after each successful lowering/optimization call" },
59 { "print_tcs", NIR_DEBUG_PRINT_TCS,
60 "Dump resulting tessellation control shader after each successful lowering/optimization call" },
61 { "print_tes", NIR_DEBUG_PRINT_TES,
62 "Dump resulting tessellation evaluation shader after each successful lowering/optimization call" },
63 { "print_gs", NIR_DEBUG_PRINT_GS,
64 "Dump resulting geometry shader after each successful lowering/optimization call" },
65 { "print_fs", NIR_DEBUG_PRINT_FS,
66 "Dump resulting fragment shader after each successful lowering/optimization call" },
67 { "print_cs", NIR_DEBUG_PRINT_CS,
68 "Dump resulting compute shader after each successful lowering/optimization call" },
69 { "print_ts", NIR_DEBUG_PRINT_TS,
70 "Dump resulting task shader after each successful lowering/optimization call" },
71 { "print_ms", NIR_DEBUG_PRINT_MS,
72 "Dump resulting mesh shader after each successful lowering/optimization call" },
73 { "print_rgs", NIR_DEBUG_PRINT_RGS,
74 "Dump resulting raygen shader after each successful lowering/optimization call" },
75 { "print_ahs", NIR_DEBUG_PRINT_AHS,
76 "Dump resulting any-hit shader after each successful lowering/optimization call" },
77 { "print_chs", NIR_DEBUG_PRINT_CHS,
78 "Dump resulting closest-hit shader after each successful lowering/optimization call" },
79 { "print_mhs", NIR_DEBUG_PRINT_MHS,
80 "Dump resulting miss-hit shader after each successful lowering/optimization call" },
81 { "print_is", NIR_DEBUG_PRINT_IS,
82 "Dump resulting intersection shader after each successful lowering/optimization call" },
83 { "print_cbs", NIR_DEBUG_PRINT_CBS,
84 "Dump resulting callable shader after each successful lowering/optimization call" },
85 { "print_ks", NIR_DEBUG_PRINT_KS,
86 "Dump resulting kernel shader after each successful lowering/optimization call" },
87 { "print_no_inline_consts", NIR_DEBUG_PRINT_NO_INLINE_CONSTS,
88 "Do not print const value near each use of const SSA variable" },
89 { "print_internal", NIR_DEBUG_PRINT_INTERNAL,
90 "Print shaders even if they are marked as internal" },
91 { "print_pass_flags", NIR_DEBUG_PRINT_PASS_FLAGS,
92 "Print pass_flags for every instruction when pass_flags are non-zero" },
93 DEBUG_NAMED_VALUE_END
94 };
95
96 DEBUG_GET_ONCE_FLAGS_OPTION(nir_debug, "NIR_DEBUG", nir_debug_control, 0)
97
98 static void
nir_process_debug_variable_once(void)99 nir_process_debug_variable_once(void)
100 {
101 nir_debug = debug_get_option_nir_debug();
102
103 /* clang-format off */
104 nir_debug_print_shader[MESA_SHADER_VERTEX] = NIR_DEBUG(PRINT_VS);
105 nir_debug_print_shader[MESA_SHADER_TESS_CTRL] = NIR_DEBUG(PRINT_TCS);
106 nir_debug_print_shader[MESA_SHADER_TESS_EVAL] = NIR_DEBUG(PRINT_TES);
107 nir_debug_print_shader[MESA_SHADER_GEOMETRY] = NIR_DEBUG(PRINT_GS);
108 nir_debug_print_shader[MESA_SHADER_FRAGMENT] = NIR_DEBUG(PRINT_FS);
109 nir_debug_print_shader[MESA_SHADER_COMPUTE] = NIR_DEBUG(PRINT_CS);
110 nir_debug_print_shader[MESA_SHADER_TASK] = NIR_DEBUG(PRINT_TS);
111 nir_debug_print_shader[MESA_SHADER_MESH] = NIR_DEBUG(PRINT_MS);
112 nir_debug_print_shader[MESA_SHADER_RAYGEN] = NIR_DEBUG(PRINT_RGS);
113 nir_debug_print_shader[MESA_SHADER_ANY_HIT] = NIR_DEBUG(PRINT_AHS);
114 nir_debug_print_shader[MESA_SHADER_CLOSEST_HIT] = NIR_DEBUG(PRINT_CHS);
115 nir_debug_print_shader[MESA_SHADER_MISS] = NIR_DEBUG(PRINT_MHS);
116 nir_debug_print_shader[MESA_SHADER_INTERSECTION] = NIR_DEBUG(PRINT_IS);
117 nir_debug_print_shader[MESA_SHADER_CALLABLE] = NIR_DEBUG(PRINT_CBS);
118 nir_debug_print_shader[MESA_SHADER_KERNEL] = NIR_DEBUG(PRINT_KS);
119 /* clang-format on */
120 }
121
122 void
nir_process_debug_variable(void)123 nir_process_debug_variable(void)
124 {
125 static once_flag flag = ONCE_FLAG_INIT;
126 call_once(&flag, nir_process_debug_variable_once);
127 }
128 #endif
129
130 /** Return true if the component mask "mask" with bit size "old_bit_size" can
131 * be re-interpreted to be used with "new_bit_size".
132 */
133 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)134 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
135 unsigned old_bit_size,
136 unsigned new_bit_size)
137 {
138 assert(util_is_power_of_two_nonzero(old_bit_size));
139 assert(util_is_power_of_two_nonzero(new_bit_size));
140
141 if (old_bit_size == new_bit_size)
142 return true;
143
144 if (old_bit_size == 1 || new_bit_size == 1)
145 return false;
146
147 if (old_bit_size > new_bit_size) {
148 unsigned ratio = old_bit_size / new_bit_size;
149 return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
150 }
151
152 unsigned iter = mask;
153 while (iter) {
154 int start, count;
155 u_bit_scan_consecutive_range(&iter, &start, &count);
156 start *= old_bit_size;
157 count *= old_bit_size;
158 if (start % new_bit_size != 0)
159 return false;
160 if (count % new_bit_size != 0)
161 return false;
162 }
163 return true;
164 }
165
166 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
167 * it can be used can be used with "new_bit_size".
168 */
169 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)170 nir_component_mask_reinterpret(nir_component_mask_t mask,
171 unsigned old_bit_size,
172 unsigned new_bit_size)
173 {
174 assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
175
176 if (old_bit_size == new_bit_size)
177 return mask;
178
179 nir_component_mask_t new_mask = 0;
180 unsigned iter = mask;
181 while (iter) {
182 int start, count;
183 u_bit_scan_consecutive_range(&iter, &start, &count);
184 start = start * old_bit_size / new_bit_size;
185 count = count * old_bit_size / new_bit_size;
186 new_mask |= BITFIELD_RANGE(start, count);
187 }
188 return new_mask;
189 }
190
191 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)192 nir_shader_create(void *mem_ctx,
193 gl_shader_stage stage,
194 const nir_shader_compiler_options *options,
195 shader_info *si)
196 {
197 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
198
199 shader->gctx = gc_context(shader);
200
201 #ifndef NDEBUG
202 nir_process_debug_variable();
203 #endif
204
205 exec_list_make_empty(&shader->variables);
206
207 shader->options = options;
208
209 if (si) {
210 assert(si->stage == stage);
211 shader->info = *si;
212 } else {
213 shader->info.stage = stage;
214 }
215
216 exec_list_make_empty(&shader->functions);
217
218 shader->num_inputs = 0;
219 shader->num_outputs = 0;
220 shader->num_uniforms = 0;
221
222 return shader;
223 }
224
225 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)226 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
227 {
228 switch (var->data.mode) {
229 case nir_var_function_temp:
230 assert(!"nir_shader_add_variable cannot be used for local variables");
231 return;
232
233 case nir_var_shader_temp:
234 case nir_var_shader_in:
235 case nir_var_shader_out:
236 case nir_var_uniform:
237 case nir_var_mem_ubo:
238 case nir_var_mem_ssbo:
239 case nir_var_image:
240 case nir_var_mem_shared:
241 case nir_var_system_value:
242 case nir_var_mem_push_const:
243 case nir_var_mem_constant:
244 case nir_var_shader_call_data:
245 case nir_var_ray_hit_attrib:
246 case nir_var_mem_task_payload:
247 case nir_var_mem_node_payload:
248 case nir_var_mem_node_payload_in:
249 case nir_var_mem_global:
250 break;
251
252 default:
253 assert(!"invalid mode");
254 return;
255 }
256
257 exec_list_push_tail(&shader->variables, &var->node);
258 }
259
260 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)261 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
262 const struct glsl_type *type, const char *name)
263 {
264 nir_variable *var = rzalloc(shader, nir_variable);
265 var->name = ralloc_strdup(var, name);
266 var->type = type;
267 var->data.mode = mode;
268 var->data.how_declared = nir_var_declared_normally;
269
270 if ((mode == nir_var_shader_in &&
271 shader->info.stage != MESA_SHADER_VERTEX &&
272 shader->info.stage != MESA_SHADER_KERNEL) ||
273 (mode == nir_var_shader_out &&
274 shader->info.stage != MESA_SHADER_FRAGMENT))
275 var->data.interpolation = INTERP_MODE_SMOOTH;
276
277 if (mode == nir_var_shader_in || mode == nir_var_uniform)
278 var->data.read_only = true;
279
280 nir_shader_add_variable(shader, var);
281
282 return var;
283 }
284
285 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)286 nir_local_variable_create(nir_function_impl *impl,
287 const struct glsl_type *type, const char *name)
288 {
289 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
290 var->name = ralloc_strdup(var, name);
291 var->type = type;
292 var->data.mode = nir_var_function_temp;
293
294 nir_function_impl_add_variable(impl, var);
295
296 return var;
297 }
298
299 nir_variable *
nir_state_variable_create(nir_shader * shader,const struct glsl_type * type,const char * name,const gl_state_index16 tokens[STATE_LENGTH])300 nir_state_variable_create(nir_shader *shader,
301 const struct glsl_type *type,
302 const char *name,
303 const gl_state_index16 tokens[STATE_LENGTH])
304 {
305 nir_variable *var = nir_variable_create(shader, nir_var_uniform, type, name);
306 var->num_state_slots = 1;
307 var->state_slots = rzalloc_array(var, nir_state_slot, 1);
308 memcpy(var->state_slots[0].tokens, tokens,
309 sizeof(var->state_slots[0].tokens));
310 shader->num_uniforms++;
311 return var;
312 }
313
314 nir_variable *
nir_create_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)315 nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
316 const struct glsl_type *type)
317 {
318 /* Only supporting non-array, or arrayed-io types, because otherwise we don't
319 * know how much to increment num_inputs/outputs
320 */
321 assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_unsized_array(type));
322
323 const char *name;
324 switch (mode) {
325 case nir_var_shader_in:
326 if (shader->info.stage == MESA_SHADER_VERTEX)
327 name = gl_vert_attrib_name(location);
328 else
329 name = gl_varying_slot_name_for_stage(location, shader->info.stage);
330 break;
331
332 case nir_var_shader_out:
333 if (shader->info.stage == MESA_SHADER_FRAGMENT)
334 name = gl_frag_result_name(location);
335 else
336 name = gl_varying_slot_name_for_stage(location, shader->info.stage);
337 break;
338
339 case nir_var_system_value:
340 name = gl_system_value_name(location);
341 break;
342
343 default:
344 unreachable("Unsupported variable mode");
345 }
346
347 nir_variable *var = nir_variable_create(shader, mode, type, name);
348 var->data.location = location;
349
350 switch (mode) {
351 case nir_var_shader_in:
352 var->data.driver_location = shader->num_inputs++;
353 break;
354
355 case nir_var_shader_out:
356 var->data.driver_location = shader->num_outputs++;
357 break;
358
359 case nir_var_system_value:
360 break;
361
362 default:
363 unreachable("Unsupported variable mode");
364 }
365
366 return var;
367 }
368
369 nir_variable *
nir_get_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)370 nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
371 const struct glsl_type *type)
372 {
373 nir_variable *var = nir_find_variable_with_location(shader, mode, location);
374 if (var) {
375 /* If this shader has location_fracs, this builder function is not suitable. */
376 assert(var->data.location_frac == 0);
377
378 /* The variable for the slot should match what we expected. */
379 assert(type == var->type);
380 return var;
381 }
382
383 return nir_create_variable_with_location(shader, mode, location, type);
384 }
385
386 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)387 nir_find_variable_with_location(nir_shader *shader,
388 nir_variable_mode mode,
389 unsigned location)
390 {
391 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
392 nir_foreach_variable_with_modes(var, shader, mode) {
393 if (var->data.location == location)
394 return var;
395 }
396 return NULL;
397 }
398
399 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)400 nir_find_variable_with_driver_location(nir_shader *shader,
401 nir_variable_mode mode,
402 unsigned location)
403 {
404 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
405 nir_foreach_variable_with_modes(var, shader, mode) {
406 if (var->data.driver_location == location)
407 return var;
408 }
409 return NULL;
410 }
411
412 nir_variable *
nir_find_state_variable(nir_shader * s,gl_state_index16 tokens[STATE_LENGTH])413 nir_find_state_variable(nir_shader *s,
414 gl_state_index16 tokens[STATE_LENGTH])
415 {
416 nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
417 if (var->num_state_slots == 1 &&
418 !memcmp(var->state_slots[0].tokens, tokens,
419 sizeof(var->state_slots[0].tokens)))
420 return var;
421 }
422 return NULL;
423 }
424
nir_find_sampler_variable_with_tex_index(nir_shader * shader,unsigned texture_index)425 nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
426 unsigned texture_index)
427 {
428 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
429 unsigned size =
430 glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
431 if ((glsl_type_is_texture(glsl_without_array(var->type)) ||
432 glsl_type_is_sampler(glsl_without_array(var->type))) &&
433 (var->data.binding == texture_index ||
434 (var->data.binding < texture_index &&
435 var->data.binding + size > texture_index)))
436 return var;
437 }
438 return NULL;
439 }
440
441 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
442 * can't count on it on MSV and Android. So we stuff the CMP function into
443 * each array element. It's a bit messy and burns more memory but the list of
444 * variables should hever be all that long.
445 */
446 struct var_cmp {
447 nir_variable *var;
448 int (*cmp)(const nir_variable *, const nir_variable *);
449 };
450
451 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)452 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
453 {
454 const struct var_cmp *a = _a;
455 const struct var_cmp *b = _b;
456 assert(a->cmp == b->cmp);
457 return a->cmp(a->var, b->var);
458 }
459
460 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)461 nir_sort_variables_with_modes(nir_shader *shader,
462 int (*cmp)(const nir_variable *,
463 const nir_variable *),
464 nir_variable_mode modes)
465 {
466 unsigned num_vars = 0;
467 nir_foreach_variable_with_modes(var, shader, modes) {
468 ++num_vars;
469 }
470 struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
471 unsigned i = 0;
472 nir_foreach_variable_with_modes_safe(var, shader, modes) {
473 exec_node_remove(&var->node);
474 vars[i++] = (struct var_cmp){
475 .var = var,
476 .cmp = cmp,
477 };
478 }
479 assert(i == num_vars);
480
481 util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
482
483 for (i = 0; i < num_vars; i++)
484 exec_list_push_tail(&shader->variables, &vars[i].var->node);
485
486 ralloc_free(vars);
487 }
488
489 nir_function *
nir_function_create(nir_shader * shader,const char * name)490 nir_function_create(nir_shader *shader, const char *name)
491 {
492 nir_function *func = ralloc(shader, nir_function);
493
494 exec_list_push_tail(&shader->functions, &func->node);
495
496 func->name = ralloc_strdup(func, name);
497 func->shader = shader;
498 func->num_params = 0;
499 func->params = NULL;
500 func->impl = NULL;
501 func->is_entrypoint = false;
502 func->is_preamble = false;
503 func->dont_inline = false;
504 func->should_inline = false;
505 func->is_subroutine = false;
506 func->subroutine_index = 0;
507 func->num_subroutine_types = 0;
508 func->subroutine_types = NULL;
509
510 /* Only meaningful for shader libraries, so don't export by default. */
511 func->is_exported = false;
512
513 return func;
514 }
515
516 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)517 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
518 {
519 dest->src = nir_src_for_ssa(src->src.ssa);
520 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
521 dest->swizzle[i] = src->swizzle[i];
522 }
523
524 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)525 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
526 {
527 static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
528 STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
529
530 const nir_alu_src *src = &alu->src[srcn];
531 unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
532
533 return (src->src.ssa->num_components == num_components) &&
534 (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
535 }
536
537 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)538 cf_init(nir_cf_node *node, nir_cf_node_type type)
539 {
540 exec_node_init(&node->node);
541 node->parent = NULL;
542 node->type = type;
543 }
544
545 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)546 nir_function_impl_create_bare(nir_shader *shader)
547 {
548 nir_function_impl *impl = ralloc(shader, nir_function_impl);
549
550 impl->function = NULL;
551 impl->preamble = NULL;
552
553 cf_init(&impl->cf_node, nir_cf_node_function);
554
555 exec_list_make_empty(&impl->body);
556 exec_list_make_empty(&impl->locals);
557 impl->ssa_alloc = 0;
558 impl->num_blocks = 0;
559 impl->valid_metadata = nir_metadata_none;
560 impl->structured = true;
561
562 /* create start & end blocks */
563 nir_block *start_block = nir_block_create(shader);
564 nir_block *end_block = nir_block_create(shader);
565 start_block->cf_node.parent = &impl->cf_node;
566 end_block->cf_node.parent = &impl->cf_node;
567 impl->end_block = end_block;
568
569 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
570
571 start_block->successors[0] = end_block;
572 _mesa_set_add(end_block->predecessors, start_block);
573 return impl;
574 }
575
576 nir_function_impl *
nir_function_impl_create(nir_function * function)577 nir_function_impl_create(nir_function *function)
578 {
579 assert(function->impl == NULL);
580
581 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
582 nir_function_set_impl(function, impl);
583 return impl;
584 }
585
586 nir_block *
nir_block_create(nir_shader * shader)587 nir_block_create(nir_shader *shader)
588 {
589 nir_block *block = rzalloc(shader, nir_block);
590
591 cf_init(&block->cf_node, nir_cf_node_block);
592
593 block->successors[0] = block->successors[1] = NULL;
594 block->predecessors = _mesa_pointer_set_create(block);
595 block->imm_dom = NULL;
596 /* XXX maybe it would be worth it to defer allocation? This
597 * way it doesn't get allocated for shader refs that never run
598 * nir_calc_dominance? For example, state-tracker creates an
599 * initial IR, clones that, runs appropriate lowering pass, passes
600 * to driver which does common lowering/opt, and then stores ref
601 * which is later used to do state specific lowering and futher
602 * opt. Do any of the references not need dominance metadata?
603 */
604 block->dom_frontier = _mesa_pointer_set_create(block);
605
606 exec_list_make_empty(&block->instr_list);
607
608 return block;
609 }
610
611 static inline void
src_init(nir_src * src)612 src_init(nir_src *src)
613 {
614 src->ssa = NULL;
615 }
616
617 nir_if *
nir_if_create(nir_shader * shader)618 nir_if_create(nir_shader *shader)
619 {
620 nir_if *if_stmt = ralloc(shader, nir_if);
621
622 if_stmt->control = nir_selection_control_none;
623
624 cf_init(&if_stmt->cf_node, nir_cf_node_if);
625 src_init(&if_stmt->condition);
626
627 nir_block *then = nir_block_create(shader);
628 exec_list_make_empty(&if_stmt->then_list);
629 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
630 then->cf_node.parent = &if_stmt->cf_node;
631
632 nir_block *else_stmt = nir_block_create(shader);
633 exec_list_make_empty(&if_stmt->else_list);
634 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
635 else_stmt->cf_node.parent = &if_stmt->cf_node;
636
637 return if_stmt;
638 }
639
640 nir_loop *
nir_loop_create(nir_shader * shader)641 nir_loop_create(nir_shader *shader)
642 {
643 nir_loop *loop = rzalloc(shader, nir_loop);
644
645 cf_init(&loop->cf_node, nir_cf_node_loop);
646 /* Assume that loops are divergent until proven otherwise */
647 loop->divergent = true;
648
649 nir_block *body = nir_block_create(shader);
650 exec_list_make_empty(&loop->body);
651 exec_list_push_tail(&loop->body, &body->cf_node.node);
652 body->cf_node.parent = &loop->cf_node;
653
654 body->successors[0] = body;
655 _mesa_set_add(body->predecessors, body);
656
657 exec_list_make_empty(&loop->continue_list);
658
659 return loop;
660 }
661
662 static void
instr_init(nir_instr * instr,nir_instr_type type)663 instr_init(nir_instr *instr, nir_instr_type type)
664 {
665 instr->type = type;
666 instr->block = NULL;
667 exec_node_init(&instr->node);
668 }
669
670 static void
alu_src_init(nir_alu_src * src)671 alu_src_init(nir_alu_src *src)
672 {
673 src_init(&src->src);
674 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
675 src->swizzle[i] = i;
676 }
677
678 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)679 nir_alu_instr_create(nir_shader *shader, nir_op op)
680 {
681 unsigned num_srcs = nir_op_infos[op].num_inputs;
682 nir_alu_instr *instr = gc_zalloc_zla(shader->gctx, nir_alu_instr, nir_alu_src, num_srcs);
683
684 instr_init(&instr->instr, nir_instr_type_alu);
685 instr->op = op;
686 for (unsigned i = 0; i < num_srcs; i++)
687 alu_src_init(&instr->src[i]);
688
689 return instr;
690 }
691
692 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)693 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
694 {
695 nir_deref_instr *instr = gc_zalloc(shader->gctx, nir_deref_instr, 1);
696
697 instr_init(&instr->instr, nir_instr_type_deref);
698
699 instr->deref_type = deref_type;
700 if (deref_type != nir_deref_type_var)
701 src_init(&instr->parent);
702
703 if (deref_type == nir_deref_type_array ||
704 deref_type == nir_deref_type_ptr_as_array)
705 src_init(&instr->arr.index);
706
707 return instr;
708 }
709
710 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)711 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
712 {
713 nir_jump_instr *instr = gc_alloc(shader->gctx, nir_jump_instr, 1);
714 instr_init(&instr->instr, nir_instr_type_jump);
715 src_init(&instr->condition);
716 instr->type = type;
717 instr->target = NULL;
718 instr->else_target = NULL;
719
720 return instr;
721 }
722
723 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)724 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
725 unsigned bit_size)
726 {
727 nir_load_const_instr *instr =
728 gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
729 instr_init(&instr->instr, nir_instr_type_load_const);
730
731 nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
732
733 return instr;
734 }
735
736 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)737 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
738 {
739 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
740 nir_intrinsic_instr *instr =
741 gc_zalloc_zla(shader->gctx, nir_intrinsic_instr, nir_src, num_srcs);
742
743 instr_init(&instr->instr, nir_instr_type_intrinsic);
744 instr->intrinsic = op;
745
746 for (unsigned i = 0; i < num_srcs; i++)
747 src_init(&instr->src[i]);
748
749 return instr;
750 }
751
752 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)753 nir_call_instr_create(nir_shader *shader, nir_function *callee)
754 {
755 const unsigned num_params = callee->num_params;
756 nir_call_instr *instr =
757 gc_zalloc_zla(shader->gctx, nir_call_instr, nir_src, num_params);
758
759 instr_init(&instr->instr, nir_instr_type_call);
760 instr->callee = callee;
761 instr->num_params = num_params;
762 for (unsigned i = 0; i < num_params; i++)
763 src_init(&instr->params[i]);
764
765 return instr;
766 }
767
768 static int8_t default_tg4_offsets[4][2] = {
769 { 0, 1 },
770 { 1, 1 },
771 { 1, 0 },
772 { 0, 0 },
773 };
774
775 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)776 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
777 {
778 nir_tex_instr *instr = gc_zalloc(shader->gctx, nir_tex_instr, 1);
779 instr_init(&instr->instr, nir_instr_type_tex);
780
781 instr->num_srcs = num_srcs;
782 instr->src = gc_alloc(shader->gctx, nir_tex_src, num_srcs);
783 for (unsigned i = 0; i < num_srcs; i++)
784 src_init(&instr->src[i].src);
785
786 instr->texture_index = 0;
787 instr->sampler_index = 0;
788 memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
789
790 return instr;
791 }
792
793 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_def * src)794 nir_tex_instr_add_src(nir_tex_instr *tex,
795 nir_tex_src_type src_type,
796 nir_def *src)
797 {
798 nir_tex_src *new_srcs = gc_zalloc(gc_get_context(tex), nir_tex_src, tex->num_srcs + 1);
799
800 for (unsigned i = 0; i < tex->num_srcs; i++) {
801 new_srcs[i].src_type = tex->src[i].src_type;
802 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
803 &tex->src[i].src);
804 }
805
806 gc_free(tex->src);
807 tex->src = new_srcs;
808
809 tex->src[tex->num_srcs].src_type = src_type;
810 nir_instr_init_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
811 tex->num_srcs++;
812 }
813
814 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)815 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
816 {
817 assert(src_idx < tex->num_srcs);
818
819 /* First rewrite the source to NIR_SRC_INIT */
820 nir_instr_clear_src(&tex->instr, &tex->src[src_idx].src);
821
822 /* Now, move all of the other sources down */
823 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
824 tex->src[i - 1].src_type = tex->src[i].src_type;
825 nir_instr_move_src(&tex->instr, &tex->src[i - 1].src, &tex->src[i].src);
826 }
827 tex->num_srcs--;
828 }
829
830 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)831 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
832 {
833 if (tex->op != nir_texop_tg4)
834 return false;
835 return memcmp(tex->tg4_offsets, default_tg4_offsets,
836 sizeof(tex->tg4_offsets)) != 0;
837 }
838
839 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)840 nir_phi_instr_create(nir_shader *shader)
841 {
842 nir_phi_instr *instr = gc_alloc(shader->gctx, nir_phi_instr, 1);
843 instr_init(&instr->instr, nir_instr_type_phi);
844
845 exec_list_make_empty(&instr->srcs);
846
847 return instr;
848 }
849
850 /**
851 * Adds a new source to a NIR instruction.
852 *
853 * Note that this does not update the def/use relationship for src, assuming
854 * that the instr is not in the shader. If it is, you have to do:
855 *
856 * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
857 */
858 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_def * src)859 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_def *src)
860 {
861 nir_phi_src *phi_src;
862
863 phi_src = gc_zalloc(gc_get_context(instr), nir_phi_src, 1);
864 phi_src->pred = pred;
865 phi_src->src = nir_src_for_ssa(src);
866 nir_src_set_parent_instr(&phi_src->src, &instr->instr);
867 exec_list_push_tail(&instr->srcs, &phi_src->node);
868
869 return phi_src;
870 }
871
872 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)873 nir_parallel_copy_instr_create(nir_shader *shader)
874 {
875 nir_parallel_copy_instr *instr = gc_alloc(shader->gctx, nir_parallel_copy_instr, 1);
876 instr_init(&instr->instr, nir_instr_type_parallel_copy);
877
878 exec_list_make_empty(&instr->entries);
879
880 return instr;
881 }
882
883 nir_debug_info_instr *
nir_debug_info_instr_create(nir_shader * shader,nir_debug_info_type type,uint32_t string_length)884 nir_debug_info_instr_create(nir_shader *shader, nir_debug_info_type type,
885 uint32_t string_length)
886 {
887 uint32_t additional_size = 0;
888 if (type == nir_debug_info_string)
889 additional_size = string_length + 1;
890
891 nir_debug_info_instr *instr = gc_zalloc_size(
892 shader->gctx, sizeof(nir_debug_info_instr) + additional_size, 1);
893 instr_init(&instr->instr, nir_instr_type_debug_info);
894
895 instr->type = type;
896
897 if (type == nir_debug_info_string)
898 instr->string_length = string_length;
899
900 return instr;
901 }
902
903 nir_undef_instr *
nir_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)904 nir_undef_instr_create(nir_shader *shader,
905 unsigned num_components,
906 unsigned bit_size)
907 {
908 nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
909 instr_init(&instr->instr, nir_instr_type_undef);
910
911 nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
912
913 return instr;
914 }
915
916 static nir_const_value
const_value_float(double d,unsigned bit_size)917 const_value_float(double d, unsigned bit_size)
918 {
919 nir_const_value v;
920 memset(&v, 0, sizeof(v));
921
922 /* clang-format off */
923 switch (bit_size) {
924 case 16: v.u16 = _mesa_float_to_half(d); break;
925 case 32: v.f32 = d; break;
926 case 64: v.f64 = d; break;
927 default:
928 unreachable("Invalid bit size");
929 }
930 /* clang-format on */
931
932 return v;
933 }
934
935 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)936 const_value_int(int64_t i, unsigned bit_size)
937 {
938 nir_const_value v;
939 memset(&v, 0, sizeof(v));
940
941 /* clang-format off */
942 switch (bit_size) {
943 case 1: v.b = i & 1; break;
944 case 8: v.i8 = i; break;
945 case 16: v.i16 = i; break;
946 case 32: v.i32 = i; break;
947 case 64: v.i64 = i; break;
948 default:
949 unreachable("Invalid bit size");
950 }
951 /* clang-format on */
952
953 return v;
954 }
955
956 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)957 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
958 {
959 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
960 const int64_t min_int = -max_int - 1;
961 switch (binop) {
962 case nir_op_iadd:
963 return const_value_int(0, bit_size);
964 case nir_op_fadd:
965 return const_value_float(0, bit_size);
966 case nir_op_imul:
967 return const_value_int(1, bit_size);
968 case nir_op_fmul:
969 return const_value_float(1, bit_size);
970 case nir_op_imin:
971 return const_value_int(max_int, bit_size);
972 case nir_op_umin:
973 return const_value_int(~0ull, bit_size);
974 case nir_op_fmin:
975 return const_value_float(INFINITY, bit_size);
976 case nir_op_imax:
977 return const_value_int(min_int, bit_size);
978 case nir_op_umax:
979 return const_value_int(0, bit_size);
980 case nir_op_fmax:
981 return const_value_float(-INFINITY, bit_size);
982 case nir_op_iand:
983 return const_value_int(~0ull, bit_size);
984 case nir_op_ior:
985 return const_value_int(0, bit_size);
986 case nir_op_ixor:
987 return const_value_int(0, bit_size);
988 default:
989 unreachable("Invalid reduction operation");
990 }
991 }
992
993 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)994 nir_cf_node_get_function(nir_cf_node *node)
995 {
996 while (node->type != nir_cf_node_function) {
997 node = node->parent;
998 }
999
1000 return nir_cf_node_as_function(node);
1001 }
1002
1003 /* Reduces a cursor by trying to convert everything to after and trying to
1004 * go up to block granularity when possible.
1005 */
1006 static nir_cursor
reduce_cursor(nir_cursor cursor)1007 reduce_cursor(nir_cursor cursor)
1008 {
1009 switch (cursor.option) {
1010 case nir_cursor_before_block:
1011 if (exec_list_is_empty(&cursor.block->instr_list)) {
1012 /* Empty block. After is as good as before. */
1013 cursor.option = nir_cursor_after_block;
1014 }
1015 return cursor;
1016
1017 case nir_cursor_after_block:
1018 return cursor;
1019
1020 case nir_cursor_before_instr: {
1021 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
1022 if (prev_instr) {
1023 /* Before this instruction is after the previous */
1024 cursor.instr = prev_instr;
1025 cursor.option = nir_cursor_after_instr;
1026 } else {
1027 /* No previous instruction. Switch to before block */
1028 cursor.block = cursor.instr->block;
1029 cursor.option = nir_cursor_before_block;
1030 }
1031 return reduce_cursor(cursor);
1032 }
1033
1034 case nir_cursor_after_instr:
1035 if (nir_instr_next(cursor.instr) == NULL) {
1036 /* This is the last instruction, switch to after block */
1037 cursor.option = nir_cursor_after_block;
1038 cursor.block = cursor.instr->block;
1039 }
1040 return cursor;
1041
1042 default:
1043 unreachable("Inavlid cursor option");
1044 }
1045 }
1046
1047 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)1048 nir_cursors_equal(nir_cursor a, nir_cursor b)
1049 {
1050 /* Reduced cursors should be unique */
1051 a = reduce_cursor(a);
1052 b = reduce_cursor(b);
1053
1054 return a.block == b.block && a.option == b.option;
1055 }
1056
1057 static bool
add_use_cb(nir_src * src,void * state)1058 add_use_cb(nir_src *src, void *state)
1059 {
1060 nir_instr *instr = state;
1061
1062 nir_src_set_parent_instr(src, instr);
1063 list_addtail(&src->use_link, &src->ssa->uses);
1064
1065 return true;
1066 }
1067
1068 static bool
add_ssa_def_cb(nir_def * def,void * state)1069 add_ssa_def_cb(nir_def *def, void *state)
1070 {
1071 nir_instr *instr = state;
1072
1073 if (instr->block && def->index == UINT_MAX) {
1074 nir_function_impl *impl =
1075 nir_cf_node_get_function(&instr->block->cf_node);
1076
1077 def->index = impl->ssa_alloc++;
1078
1079 impl->valid_metadata &= ~nir_metadata_live_defs;
1080 }
1081
1082 return true;
1083 }
1084
1085 static void
add_defs_uses(nir_instr * instr)1086 add_defs_uses(nir_instr *instr)
1087 {
1088 nir_foreach_src(instr, add_use_cb, instr);
1089 nir_foreach_def(instr, add_ssa_def_cb, instr);
1090 }
1091
1092 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1093 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1094 {
1095 switch (cursor.option) {
1096 case nir_cursor_before_block:
1097 /* Only allow inserting jumps into empty blocks. */
1098 if (instr->type == nir_instr_type_jump)
1099 assert(exec_list_is_empty(&cursor.block->instr_list));
1100
1101 instr->block = cursor.block;
1102 add_defs_uses(instr);
1103 exec_list_push_head(&cursor.block->instr_list, &instr->node);
1104 break;
1105 case nir_cursor_after_block: {
1106 /* Inserting instructions after a jump is illegal. */
1107 nir_instr *last = nir_block_last_instr(cursor.block);
1108 assert(last == NULL || last->type != nir_instr_type_jump);
1109 (void)last;
1110
1111 instr->block = cursor.block;
1112 add_defs_uses(instr);
1113 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1114 break;
1115 }
1116 case nir_cursor_before_instr:
1117 assert(instr->type != nir_instr_type_jump);
1118 instr->block = cursor.instr->block;
1119 add_defs_uses(instr);
1120 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1121 break;
1122 case nir_cursor_after_instr:
1123 /* Inserting instructions after a jump is illegal. */
1124 assert(cursor.instr->type != nir_instr_type_jump);
1125
1126 /* Only allow inserting jumps at the end of the block. */
1127 if (instr->type == nir_instr_type_jump)
1128 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1129
1130 instr->block = cursor.instr->block;
1131 add_defs_uses(instr);
1132 exec_node_insert_after(&cursor.instr->node, &instr->node);
1133 break;
1134 }
1135
1136 if (instr->type == nir_instr_type_jump)
1137 nir_handle_add_jump(instr->block);
1138
1139 nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1140 impl->valid_metadata &= ~nir_metadata_instr_index;
1141 }
1142
1143 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1144 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1145 {
1146 /* If the cursor happens to refer to this instruction (either before or
1147 * after), don't do anything.
1148 */
1149 if ((cursor.option == nir_cursor_before_instr ||
1150 cursor.option == nir_cursor_after_instr) &&
1151 cursor.instr == instr)
1152 return false;
1153
1154 nir_instr_remove(instr);
1155 nir_instr_insert(cursor, instr);
1156 return true;
1157 }
1158
1159 static bool
src_is_valid(const nir_src * src)1160 src_is_valid(const nir_src *src)
1161 {
1162 return (src->ssa != NULL);
1163 }
1164
1165 static bool
remove_use_cb(nir_src * src,void * state)1166 remove_use_cb(nir_src *src, void *state)
1167 {
1168 (void)state;
1169
1170 if (src_is_valid(src))
1171 list_del(&src->use_link);
1172
1173 return true;
1174 }
1175
1176 static void
remove_defs_uses(nir_instr * instr)1177 remove_defs_uses(nir_instr *instr)
1178 {
1179 nir_foreach_src(instr, remove_use_cb, instr);
1180 }
1181
1182 void
nir_instr_remove_v(nir_instr * instr)1183 nir_instr_remove_v(nir_instr *instr)
1184 {
1185 remove_defs_uses(instr);
1186 exec_node_remove(&instr->node);
1187
1188 if (instr->type == nir_instr_type_jump) {
1189 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1190 nir_handle_remove_jump(instr->block, jump_instr->type);
1191 }
1192 }
1193
1194 void
nir_instr_free(nir_instr * instr)1195 nir_instr_free(nir_instr *instr)
1196 {
1197 switch (instr->type) {
1198 case nir_instr_type_tex:
1199 gc_free(nir_instr_as_tex(instr)->src);
1200 break;
1201
1202 case nir_instr_type_phi: {
1203 nir_phi_instr *phi = nir_instr_as_phi(instr);
1204 nir_foreach_phi_src_safe(phi_src, phi)
1205 gc_free(phi_src);
1206 break;
1207 }
1208
1209 default:
1210 break;
1211 }
1212
1213 gc_free(instr);
1214 }
1215
1216 void
nir_instr_free_list(struct exec_list * list)1217 nir_instr_free_list(struct exec_list *list)
1218 {
1219 struct exec_node *node;
1220 while ((node = exec_list_pop_head(list))) {
1221 nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1222 nir_instr_free(removed_instr);
1223 }
1224 }
1225
1226 static bool
nir_instr_free_and_dce_live_cb(nir_def * def,void * state)1227 nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
1228 {
1229 bool *live = state;
1230
1231 if (!nir_def_is_unused(def)) {
1232 *live = true;
1233 return false;
1234 } else {
1235 return true;
1236 }
1237 }
1238
1239 static bool
nir_instr_free_and_dce_is_live(nir_instr * instr)1240 nir_instr_free_and_dce_is_live(nir_instr *instr)
1241 {
1242 /* Note: don't have to worry about jumps because they don't have dests to
1243 * become unused.
1244 */
1245 if (instr->type == nir_instr_type_intrinsic) {
1246 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1247 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1248 if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1249 return true;
1250 }
1251
1252 bool live = false;
1253 nir_foreach_def(instr, nir_instr_free_and_dce_live_cb, &live);
1254 return live;
1255 }
1256
1257 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1258 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1259 {
1260 nir_instr_worklist *wl = state;
1261
1262 list_del(&src->use_link);
1263 if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1264 nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1265
1266 /* Stop nir_instr_remove from trying to delete the link again. */
1267 src->ssa = NULL;
1268
1269 return true;
1270 }
1271
1272 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1273 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1274 {
1275 nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1276 }
1277
1278 /**
1279 * Frees an instruction and any SSA defs that it used that are now dead,
1280 * returning a nir_cursor where the instruction previously was.
1281 */
1282 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1283 nir_instr_free_and_dce(nir_instr *instr)
1284 {
1285 nir_instr_worklist *worklist = nir_instr_worklist_create();
1286
1287 nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1288 nir_cursor c = nir_instr_remove(instr);
1289
1290 struct exec_list to_free;
1291 exec_list_make_empty(&to_free);
1292
1293 nir_instr *dce_instr;
1294 while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1295 nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1296
1297 /* If we're removing the instr where our cursor is, then we have to
1298 * point the cursor elsewhere.
1299 */
1300 if ((c.option == nir_cursor_before_instr ||
1301 c.option == nir_cursor_after_instr) &&
1302 c.instr == dce_instr)
1303 c = nir_instr_remove(dce_instr);
1304 else
1305 nir_instr_remove(dce_instr);
1306 exec_list_push_tail(&to_free, &dce_instr->node);
1307 }
1308
1309 nir_instr_free_list(&to_free);
1310
1311 nir_instr_worklist_destroy(worklist);
1312
1313 return c;
1314 }
1315
1316 /*@}*/
1317
1318 nir_def *
nir_instr_def(nir_instr * instr)1319 nir_instr_def(nir_instr *instr)
1320 {
1321 switch (instr->type) {
1322 case nir_instr_type_alu:
1323 return &nir_instr_as_alu(instr)->def;
1324
1325 case nir_instr_type_deref:
1326 return &nir_instr_as_deref(instr)->def;
1327
1328 case nir_instr_type_tex:
1329 return &nir_instr_as_tex(instr)->def;
1330
1331 case nir_instr_type_intrinsic: {
1332 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1333 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1334 return &intrin->def;
1335 } else {
1336 return NULL;
1337 }
1338 }
1339
1340 case nir_instr_type_phi:
1341 return &nir_instr_as_phi(instr)->def;
1342
1343 case nir_instr_type_parallel_copy:
1344 unreachable("Parallel copies are unsupported by this function");
1345
1346 case nir_instr_type_load_const:
1347 return &nir_instr_as_load_const(instr)->def;
1348
1349 case nir_instr_type_undef:
1350 return &nir_instr_as_undef(instr)->def;
1351
1352 case nir_instr_type_call:
1353 case nir_instr_type_jump:
1354 case nir_instr_type_debug_info:
1355 return NULL;
1356 }
1357
1358 unreachable("Invalid instruction type");
1359 }
1360
1361 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1362 nir_foreach_phi_src_leaving_block(nir_block *block,
1363 nir_foreach_src_cb cb,
1364 void *state)
1365 {
1366 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1367 if (block->successors[i] == NULL)
1368 continue;
1369
1370 nir_foreach_phi(phi, block->successors[i]) {
1371 nir_foreach_phi_src(phi_src, phi) {
1372 if (phi_src->pred == block) {
1373 if (!cb(&phi_src->src, state))
1374 return false;
1375 }
1376 }
1377 }
1378 }
1379
1380 return true;
1381 }
1382
1383 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1384 nir_const_value_for_float(double f, unsigned bit_size)
1385 {
1386 nir_const_value v;
1387 memset(&v, 0, sizeof(v));
1388
1389 /* clang-format off */
1390 switch (bit_size) {
1391 case 16: v.u16 = _mesa_float_to_half(f); break;
1392 case 32: v.f32 = f; break;
1393 case 64: v.f64 = f; break;
1394 default: unreachable("Invalid bit size");
1395 }
1396 /* clang-format on */
1397
1398 return v;
1399 }
1400
1401 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1402 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1403 {
1404 /* clang-format off */
1405 switch (bit_size) {
1406 case 16: return _mesa_half_to_float(value.u16);
1407 case 32: return value.f32;
1408 case 64: return value.f64;
1409 default: unreachable("Invalid bit size");
1410 }
1411 /* clang-format on */
1412 }
1413
1414 nir_const_value *
nir_src_as_const_value(nir_src src)1415 nir_src_as_const_value(nir_src src)
1416 {
1417 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1418 return NULL;
1419
1420 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1421
1422 return load->value;
1423 }
1424
1425 const char *
nir_src_as_string(nir_src src)1426 nir_src_as_string(nir_src src)
1427 {
1428 nir_debug_info_instr *di = nir_src_as_debug_info(src);
1429 if (di && di->type == nir_debug_info_string)
1430 return di->string;
1431
1432 return NULL;
1433 }
1434
1435 /**
1436 * Returns true if the source is known to be always uniform. Otherwise it
1437 * returns false which means it may or may not be uniform but it can't be
1438 * determined.
1439 *
1440 * For a more precise analysis of uniform values, use nir_divergence_analysis.
1441 */
1442 bool
nir_src_is_always_uniform(nir_src src)1443 nir_src_is_always_uniform(nir_src src)
1444 {
1445 /* Constants are trivially uniform */
1446 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1447 return true;
1448
1449 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1450 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1451 /* As are uniform variables */
1452 if (intr->intrinsic == nir_intrinsic_load_uniform &&
1453 nir_src_is_always_uniform(intr->src[0]))
1454 return true;
1455 /* From the Vulkan specification 15.6.1. Push Constant Interface:
1456 * "Any member of a push constant block that is declared as an array must
1457 * only be accessed with dynamically uniform indices."
1458 */
1459 if (intr->intrinsic == nir_intrinsic_load_push_constant)
1460 return true;
1461 if (intr->intrinsic == nir_intrinsic_load_deref &&
1462 nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1463 return true;
1464 }
1465
1466 /* Operating together uniform expressions produces a uniform result */
1467 if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1468 nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1469 for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1470 if (!nir_src_is_always_uniform(alu->src[i].src))
1471 return false;
1472 }
1473
1474 return true;
1475 }
1476
1477 /* XXX: this could have many more tests, such as when a sampler function is
1478 * called with uniform arguments.
1479 */
1480 return false;
1481 }
1482
1483 nir_block *
nir_src_get_block(nir_src * src)1484 nir_src_get_block(nir_src *src)
1485 {
1486 if (nir_src_is_if(src))
1487 return nir_cf_node_cf_tree_prev(&nir_src_parent_if(src)->cf_node);
1488 else if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
1489 return list_entry(src, nir_phi_src, src)->pred;
1490 else
1491 return nir_src_parent_instr(src)->block;
1492 }
1493
1494 static void
src_remove_all_uses(nir_src * src)1495 src_remove_all_uses(nir_src *src)
1496 {
1497 if (src && src_is_valid(src))
1498 list_del(&src->use_link);
1499 }
1500
1501 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1502 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1503 {
1504 if (!src)
1505 return;
1506
1507 if (!src_is_valid(src))
1508 return;
1509
1510 if (parent_instr) {
1511 nir_src_set_parent_instr(src, parent_instr);
1512 } else {
1513 assert(parent_if);
1514 nir_src_set_parent_if(src, parent_if);
1515 }
1516
1517 list_addtail(&src->use_link, &src->ssa->uses);
1518 }
1519
1520 void
nir_instr_init_src(nir_instr * instr,nir_src * src,nir_def * def)1521 nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def)
1522 {
1523 *src = nir_src_for_ssa(def);
1524 src_add_all_uses(src, instr, NULL);
1525 }
1526
1527 void
nir_instr_clear_src(nir_instr * instr,nir_src * src)1528 nir_instr_clear_src(nir_instr *instr, nir_src *src)
1529 {
1530 src_remove_all_uses(src);
1531 *src = NIR_SRC_INIT;
1532 }
1533
1534 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1535 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1536 {
1537 assert(!src_is_valid(dest) || nir_src_parent_instr(dest) == dest_instr);
1538
1539 src_remove_all_uses(dest);
1540 src_remove_all_uses(src);
1541 *dest = *src;
1542 *src = NIR_SRC_INIT;
1543 src_add_all_uses(dest, dest_instr, NULL);
1544 }
1545
1546 void
nir_def_init(nir_instr * instr,nir_def * def,unsigned num_components,unsigned bit_size)1547 nir_def_init(nir_instr *instr, nir_def *def,
1548 unsigned num_components,
1549 unsigned bit_size)
1550 {
1551 def->parent_instr = instr;
1552 list_inithead(&def->uses);
1553 def->num_components = num_components;
1554 def->bit_size = bit_size;
1555 def->divergent = true; /* This is the safer default */
1556
1557 if (instr->block) {
1558 nir_function_impl *impl =
1559 nir_cf_node_get_function(&instr->block->cf_node);
1560
1561 def->index = impl->ssa_alloc++;
1562
1563 impl->valid_metadata &= ~nir_metadata_live_defs;
1564 } else {
1565 def->index = UINT_MAX;
1566 }
1567 }
1568
1569 void
nir_def_rewrite_uses(nir_def * def,nir_def * new_ssa)1570 nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
1571 {
1572 assert(def != new_ssa);
1573 nir_foreach_use_including_if_safe(use_src, def) {
1574 nir_src_rewrite(use_src, new_ssa);
1575 }
1576 }
1577
1578 void
nir_def_rewrite_uses_src(nir_def * def,nir_src new_src)1579 nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
1580 {
1581 nir_def_rewrite_uses(def, new_src.ssa);
1582 }
1583
1584 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1585 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1586 {
1587 assert(start->block == end->block);
1588
1589 if (between->block != start->block)
1590 return false;
1591
1592 /* Search backwards looking for "between" */
1593 while (start != end) {
1594 if (between == end)
1595 return true;
1596
1597 end = nir_instr_prev(end);
1598 assert(end);
1599 }
1600
1601 return false;
1602 }
1603
1604 /* Replaces all uses of the given SSA def with the given source but only if
1605 * the use comes after the after_me instruction. This can be useful if you
1606 * are emitting code to fix up the result of some instruction: you can freely
1607 * use the result in that code and then call rewrite_uses_after and pass the
1608 * last fixup instruction as after_me and it will replace all of the uses you
1609 * want without touching the fixup code.
1610 *
1611 * This function assumes that after_me is in the same block as
1612 * def->parent_instr and that after_me comes after def->parent_instr.
1613 */
1614 void
nir_def_rewrite_uses_after(nir_def * def,nir_def * new_ssa,nir_instr * after_me)1615 nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
1616 nir_instr *after_me)
1617 {
1618 if (def == new_ssa)
1619 return;
1620
1621 nir_foreach_use_including_if_safe(use_src, def) {
1622 if (!nir_src_is_if(use_src)) {
1623 assert(nir_src_parent_instr(use_src) != def->parent_instr);
1624
1625 /* Since def already dominates all of its uses, the only way a use can
1626 * not be dominated by after_me is if it is between def and after_me in
1627 * the instruction list.
1628 */
1629 if (is_instr_between(def->parent_instr, after_me, nir_src_parent_instr(use_src)))
1630 continue;
1631 }
1632
1633 nir_src_rewrite(use_src, new_ssa);
1634 }
1635 }
1636
1637 static nir_def *
get_store_value(nir_intrinsic_instr * intrin)1638 get_store_value(nir_intrinsic_instr *intrin)
1639 {
1640 assert(nir_intrinsic_has_write_mask(intrin));
1641 /* deref stores have the deref in src[0] and the store value in src[1] */
1642 if (intrin->intrinsic == nir_intrinsic_store_deref ||
1643 intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1644 return intrin->src[1].ssa;
1645
1646 /* all other stores have the store value in src[0] */
1647 return intrin->src[0].ssa;
1648 }
1649
1650 nir_component_mask_t
nir_src_components_read(const nir_src * src)1651 nir_src_components_read(const nir_src *src)
1652 {
1653 assert(nir_src_parent_instr(src));
1654
1655 if (nir_src_parent_instr(src)->type == nir_instr_type_alu) {
1656 nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
1657 nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1658 int src_idx = alu_src - &alu->src[0];
1659 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1660 return nir_alu_instr_src_read_mask(alu, src_idx);
1661 } else if (nir_src_parent_instr(src)->type == nir_instr_type_intrinsic) {
1662 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
1663 if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1664 return nir_intrinsic_write_mask(intrin);
1665 else
1666 return (1 << src->ssa->num_components) - 1;
1667 } else {
1668 return (1 << src->ssa->num_components) - 1;
1669 }
1670 }
1671
1672 nir_component_mask_t
nir_def_components_read(const nir_def * def)1673 nir_def_components_read(const nir_def *def)
1674 {
1675 nir_component_mask_t read_mask = 0;
1676
1677 nir_foreach_use_including_if(use, def) {
1678 read_mask |= nir_src_is_if(use) ? 1 : nir_src_components_read(use);
1679
1680 if (read_mask == (1 << def->num_components) - 1)
1681 return read_mask;
1682 }
1683
1684 return read_mask;
1685 }
1686
1687 bool
nir_def_all_uses_are_fsat(const nir_def * def)1688 nir_def_all_uses_are_fsat(const nir_def *def)
1689 {
1690 nir_foreach_use(src, def) {
1691 if (nir_src_is_if(src))
1692 return false;
1693
1694 nir_instr *use = nir_src_parent_instr(src);
1695 if (use->type != nir_instr_type_alu)
1696 return false;
1697
1698 nir_alu_instr *alu = nir_instr_as_alu(use);
1699 if (alu->op != nir_op_fsat)
1700 return false;
1701 }
1702
1703 return true;
1704 }
1705
1706 nir_block *
nir_block_unstructured_next(nir_block * block)1707 nir_block_unstructured_next(nir_block *block)
1708 {
1709 if (block == NULL) {
1710 /* nir_foreach_block_unstructured_safe() will call this function on a
1711 * NULL block after the last iteration, but it won't use the result so
1712 * just return NULL here.
1713 */
1714 return NULL;
1715 }
1716
1717 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1718 if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1719 return NULL;
1720
1721 if (cf_next && cf_next->type == nir_cf_node_block)
1722 return nir_cf_node_as_block(cf_next);
1723
1724 return nir_block_cf_tree_next(block);
1725 }
1726
1727 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1728 nir_unstructured_start_block(nir_function_impl *impl)
1729 {
1730 return nir_start_block(impl);
1731 }
1732
1733 nir_block *
nir_block_cf_tree_next(nir_block * block)1734 nir_block_cf_tree_next(nir_block *block)
1735 {
1736 if (block == NULL) {
1737 /* nir_foreach_block_safe() will call this function on a NULL block
1738 * after the last iteration, but it won't use the result so just return
1739 * NULL here.
1740 */
1741 return NULL;
1742 }
1743
1744 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1745
1746 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1747 if (cf_next)
1748 return nir_cf_node_cf_tree_first(cf_next);
1749
1750 nir_cf_node *parent = block->cf_node.parent;
1751 if (parent->type == nir_cf_node_function)
1752 return NULL;
1753
1754 /* Is this the last block of a cf_node? Return the following block */
1755 if (block == nir_cf_node_cf_tree_last(parent))
1756 return nir_cf_node_as_block(nir_cf_node_next(parent));
1757
1758 switch (parent->type) {
1759 case nir_cf_node_if: {
1760 /* We are at the end of the if. Go to the beginning of the else */
1761 nir_if *if_stmt = nir_cf_node_as_if(parent);
1762 assert(block == nir_if_last_then_block(if_stmt));
1763 return nir_if_first_else_block(if_stmt);
1764 }
1765
1766 case nir_cf_node_loop: {
1767 /* We are at the end of the body and there is a continue construct */
1768 nir_loop *loop = nir_cf_node_as_loop(parent);
1769 assert(block == nir_loop_last_block(loop) &&
1770 nir_loop_has_continue_construct(loop));
1771 return nir_loop_first_continue_block(loop);
1772 }
1773
1774 default:
1775 unreachable("unknown cf node type");
1776 }
1777 }
1778
1779 nir_block *
nir_block_cf_tree_prev(nir_block * block)1780 nir_block_cf_tree_prev(nir_block *block)
1781 {
1782 if (block == NULL) {
1783 /* do this for consistency with nir_block_cf_tree_next() */
1784 return NULL;
1785 }
1786
1787 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1788
1789 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1790 if (cf_prev)
1791 return nir_cf_node_cf_tree_last(cf_prev);
1792
1793 nir_cf_node *parent = block->cf_node.parent;
1794 if (parent->type == nir_cf_node_function)
1795 return NULL;
1796
1797 /* Is this the first block of a cf_node? Return the previous block */
1798 if (block == nir_cf_node_cf_tree_first(parent))
1799 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1800
1801 switch (parent->type) {
1802 case nir_cf_node_if: {
1803 /* We are at the beginning of the else. Go to the end of the if */
1804 nir_if *if_stmt = nir_cf_node_as_if(parent);
1805 assert(block == nir_if_first_else_block(if_stmt));
1806 return nir_if_last_then_block(if_stmt);
1807 }
1808 case nir_cf_node_loop: {
1809 /* We are at the beginning of the continue construct. */
1810 nir_loop *loop = nir_cf_node_as_loop(parent);
1811 assert(nir_loop_has_continue_construct(loop) &&
1812 block == nir_loop_first_continue_block(loop));
1813 return nir_loop_last_block(loop);
1814 }
1815
1816 default:
1817 unreachable("unknown cf node type");
1818 }
1819 }
1820
1821 nir_block *
nir_cf_node_cf_tree_first(nir_cf_node * node)1822 nir_cf_node_cf_tree_first(nir_cf_node *node)
1823 {
1824 switch (node->type) {
1825 case nir_cf_node_function: {
1826 nir_function_impl *impl = nir_cf_node_as_function(node);
1827 return nir_start_block(impl);
1828 }
1829
1830 case nir_cf_node_if: {
1831 nir_if *if_stmt = nir_cf_node_as_if(node);
1832 return nir_if_first_then_block(if_stmt);
1833 }
1834
1835 case nir_cf_node_loop: {
1836 nir_loop *loop = nir_cf_node_as_loop(node);
1837 return nir_loop_first_block(loop);
1838 }
1839
1840 case nir_cf_node_block: {
1841 return nir_cf_node_as_block(node);
1842 }
1843
1844 default:
1845 unreachable("unknown node type");
1846 }
1847 }
1848
1849 nir_block *
nir_cf_node_cf_tree_last(nir_cf_node * node)1850 nir_cf_node_cf_tree_last(nir_cf_node *node)
1851 {
1852 switch (node->type) {
1853 case nir_cf_node_function: {
1854 nir_function_impl *impl = nir_cf_node_as_function(node);
1855 return nir_impl_last_block(impl);
1856 }
1857
1858 case nir_cf_node_if: {
1859 nir_if *if_stmt = nir_cf_node_as_if(node);
1860 return nir_if_last_else_block(if_stmt);
1861 }
1862
1863 case nir_cf_node_loop: {
1864 nir_loop *loop = nir_cf_node_as_loop(node);
1865 if (nir_loop_has_continue_construct(loop))
1866 return nir_loop_last_continue_block(loop);
1867 else
1868 return nir_loop_last_block(loop);
1869 }
1870
1871 case nir_cf_node_block: {
1872 return nir_cf_node_as_block(node);
1873 }
1874
1875 default:
1876 unreachable("unknown node type");
1877 }
1878 }
1879
1880 nir_block *
nir_cf_node_cf_tree_next(nir_cf_node * node)1881 nir_cf_node_cf_tree_next(nir_cf_node *node)
1882 {
1883 if (node->type == nir_cf_node_block)
1884 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1885 else if (node->type == nir_cf_node_function)
1886 return NULL;
1887 else
1888 return nir_cf_node_as_block(nir_cf_node_next(node));
1889 }
1890
1891 nir_block *
nir_cf_node_cf_tree_prev(nir_cf_node * node)1892 nir_cf_node_cf_tree_prev(nir_cf_node *node)
1893 {
1894 if (node->type == nir_cf_node_block)
1895 return nir_block_cf_tree_prev(nir_cf_node_as_block(node));
1896 else if (node->type == nir_cf_node_function)
1897 return NULL;
1898 else
1899 return nir_cf_node_as_block(nir_cf_node_prev(node));
1900 }
1901
1902 nir_if *
nir_block_get_following_if(nir_block * block)1903 nir_block_get_following_if(nir_block *block)
1904 {
1905 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1906 return NULL;
1907
1908 if (nir_cf_node_is_last(&block->cf_node))
1909 return NULL;
1910
1911 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1912
1913 if (next_node->type != nir_cf_node_if)
1914 return NULL;
1915
1916 return nir_cf_node_as_if(next_node);
1917 }
1918
1919 nir_loop *
nir_block_get_following_loop(nir_block * block)1920 nir_block_get_following_loop(nir_block *block)
1921 {
1922 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1923 return NULL;
1924
1925 if (nir_cf_node_is_last(&block->cf_node))
1926 return NULL;
1927
1928 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1929
1930 if (next_node->type != nir_cf_node_loop)
1931 return NULL;
1932
1933 return nir_cf_node_as_loop(next_node);
1934 }
1935
1936 static int
compare_block_index(const void * p1,const void * p2)1937 compare_block_index(const void *p1, const void *p2)
1938 {
1939 const nir_block *block1 = *((const nir_block **)p1);
1940 const nir_block *block2 = *((const nir_block **)p2);
1941
1942 return (int)block1->index - (int)block2->index;
1943 }
1944
1945 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)1946 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
1947 {
1948 nir_block **preds =
1949 ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
1950
1951 unsigned i = 0;
1952 set_foreach(block->predecessors, entry)
1953 preds[i++] = (nir_block *)entry->key;
1954 assert(i == block->predecessors->entries);
1955
1956 qsort(preds, block->predecessors->entries, sizeof(nir_block *),
1957 compare_block_index);
1958
1959 return preds;
1960 }
1961
1962 void
nir_index_blocks(nir_function_impl * impl)1963 nir_index_blocks(nir_function_impl *impl)
1964 {
1965 unsigned index = 0;
1966
1967 if (impl->valid_metadata & nir_metadata_block_index)
1968 return;
1969
1970 nir_foreach_block_unstructured(block, impl) {
1971 block->index = index++;
1972 }
1973
1974 /* The end_block isn't really part of the program, which is why its index
1975 * is >= num_blocks.
1976 */
1977 impl->num_blocks = impl->end_block->index = index;
1978 }
1979
1980 static bool
index_ssa_def_cb(nir_def * def,void * state)1981 index_ssa_def_cb(nir_def *def, void *state)
1982 {
1983 unsigned *index = (unsigned *)state;
1984 def->index = (*index)++;
1985
1986 return true;
1987 }
1988
1989 /**
1990 * The indices are applied top-to-bottom which has the very nice property
1991 * that, if A dominates B, then A->index <= B->index.
1992 */
1993 void
nir_index_ssa_defs(nir_function_impl * impl)1994 nir_index_ssa_defs(nir_function_impl *impl)
1995 {
1996 unsigned index = 0;
1997
1998 impl->valid_metadata &= ~nir_metadata_live_defs;
1999
2000 nir_foreach_block_unstructured(block, impl) {
2001 nir_foreach_instr(instr, block)
2002 nir_foreach_def(instr, index_ssa_def_cb, &index);
2003 }
2004
2005 impl->ssa_alloc = index;
2006 }
2007
2008 /**
2009 * The indices are applied top-to-bottom which has the very nice property
2010 * that, if A dominates B, then A->index <= B->index.
2011 */
2012 unsigned
nir_index_instrs(nir_function_impl * impl)2013 nir_index_instrs(nir_function_impl *impl)
2014 {
2015 unsigned index = 0;
2016
2017 nir_foreach_block(block, impl) {
2018 block->start_ip = index++;
2019
2020 nir_foreach_instr(instr, block)
2021 instr->index = index++;
2022
2023 block->end_ip = index++;
2024 }
2025
2026 return index;
2027 }
2028
2029 void
nir_shader_clear_pass_flags(nir_shader * shader)2030 nir_shader_clear_pass_flags(nir_shader *shader)
2031 {
2032 nir_foreach_function_impl(impl, shader) {
2033 nir_foreach_block(block, impl) {
2034 nir_foreach_instr(instr, block) {
2035 instr->pass_flags = 0;
2036 }
2037 }
2038 }
2039 }
2040
2041 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)2042 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
2043 {
2044 unsigned count = 0;
2045 nir_foreach_variable_with_modes(var, shader, modes)
2046 var->index = count++;
2047 return count;
2048 }
2049
2050 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2051 nir_function_impl_index_vars(nir_function_impl *impl)
2052 {
2053 unsigned count = 0;
2054 nir_foreach_function_temp_variable(var, impl)
2055 var->index = count++;
2056 return count;
2057 }
2058
2059 static nir_instr *
cursor_next_instr(nir_cursor cursor)2060 cursor_next_instr(nir_cursor cursor)
2061 {
2062 switch (cursor.option) {
2063 case nir_cursor_before_block:
2064 for (nir_block *block = cursor.block; block;
2065 block = nir_block_cf_tree_next(block)) {
2066 nir_instr *instr = nir_block_first_instr(block);
2067 if (instr)
2068 return instr;
2069 }
2070 return NULL;
2071
2072 case nir_cursor_after_block:
2073 cursor.block = nir_block_cf_tree_next(cursor.block);
2074 if (cursor.block == NULL)
2075 return NULL;
2076
2077 cursor.option = nir_cursor_before_block;
2078 return cursor_next_instr(cursor);
2079
2080 case nir_cursor_before_instr:
2081 return cursor.instr;
2082
2083 case nir_cursor_after_instr:
2084 if (nir_instr_next(cursor.instr))
2085 return nir_instr_next(cursor.instr);
2086
2087 cursor.option = nir_cursor_after_block;
2088 cursor.block = cursor.instr->block;
2089 return cursor_next_instr(cursor);
2090 }
2091
2092 unreachable("Inavlid cursor option");
2093 }
2094
2095 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2096 nir_function_impl_lower_instructions(nir_function_impl *impl,
2097 nir_instr_filter_cb filter,
2098 nir_lower_instr_cb lower,
2099 void *cb_data)
2100 {
2101 nir_builder b = nir_builder_create(impl);
2102
2103 nir_metadata preserved = nir_metadata_control_flow;
2104
2105 bool progress = false;
2106 nir_cursor iter = nir_before_impl(impl);
2107 nir_instr *instr;
2108 while ((instr = cursor_next_instr(iter)) != NULL) {
2109 if (filter && !filter(instr, cb_data)) {
2110 iter = nir_after_instr(instr);
2111 continue;
2112 }
2113
2114 nir_def *old_def = nir_instr_def(instr);
2115 struct list_head old_uses;
2116 if (old_def != NULL) {
2117 /* We're about to ask the callback to generate a replacement for instr.
2118 * Save off the uses from instr's SSA def so we know what uses to
2119 * rewrite later. If we use nir_def_rewrite_uses, it fails in the
2120 * case where the generated replacement code uses the result of instr
2121 * itself. If we use nir_def_rewrite_uses_after (which is the
2122 * normal solution to this problem), it doesn't work well if control-
2123 * flow is inserted as part of the replacement, doesn't handle cases
2124 * where the replacement is something consumed by instr, and suffers
2125 * from performance issues. This is the only way to 100% guarantee
2126 * that we rewrite the correct set efficiently.
2127 */
2128
2129 list_replace(&old_def->uses, &old_uses);
2130 list_inithead(&old_def->uses);
2131 }
2132
2133 b.cursor = nir_after_instr(instr);
2134 nir_def *new_def = lower(&b, instr, cb_data);
2135 if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2136 new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2137 assert(old_def != NULL);
2138 if (new_def->parent_instr->block != instr->block)
2139 preserved = nir_metadata_none;
2140
2141 list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2142 nir_src_rewrite(use_src, new_def);
2143
2144 if (nir_def_is_unused(old_def)) {
2145 iter = nir_instr_free_and_dce(instr);
2146 } else {
2147 iter = nir_after_instr(instr);
2148 }
2149 progress = true;
2150 } else {
2151 /* We didn't end up lowering after all. Put the uses back */
2152 if (old_def)
2153 list_replace(&old_uses, &old_def->uses);
2154
2155 if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2156 /* Only instructions without a return value can be removed like this */
2157 assert(!old_def);
2158 iter = nir_instr_free_and_dce(instr);
2159 progress = true;
2160 } else
2161 iter = nir_after_instr(instr);
2162
2163 if (new_def == NIR_LOWER_INSTR_PROGRESS)
2164 progress = true;
2165 }
2166 }
2167
2168 if (progress) {
2169 nir_metadata_preserve(impl, preserved);
2170 } else {
2171 nir_metadata_preserve(impl, nir_metadata_all);
2172 }
2173
2174 return progress;
2175 }
2176
2177 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2178 nir_shader_lower_instructions(nir_shader *shader,
2179 nir_instr_filter_cb filter,
2180 nir_lower_instr_cb lower,
2181 void *cb_data)
2182 {
2183 bool progress = false;
2184
2185 nir_foreach_function_impl(impl, shader) {
2186 if (nir_function_impl_lower_instructions(impl, filter, lower, cb_data))
2187 progress = true;
2188 }
2189
2190 return progress;
2191 }
2192
2193 /**
2194 * Returns true if the shader supports quad-based implicit derivatives on
2195 * texture sampling.
2196 */
2197 bool
nir_shader_supports_implicit_lod(nir_shader * shader)2198 nir_shader_supports_implicit_lod(nir_shader *shader)
2199 {
2200 return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2201 (gl_shader_stage_uses_workgroup(shader->info.stage) &&
2202 shader->info.derivative_group != DERIVATIVE_GROUP_NONE));
2203 }
2204
2205 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2206 nir_intrinsic_from_system_value(gl_system_value val)
2207 {
2208 switch (val) {
2209 case SYSTEM_VALUE_VERTEX_ID:
2210 return nir_intrinsic_load_vertex_id;
2211 case SYSTEM_VALUE_INSTANCE_ID:
2212 return nir_intrinsic_load_instance_id;
2213 case SYSTEM_VALUE_DRAW_ID:
2214 return nir_intrinsic_load_draw_id;
2215 case SYSTEM_VALUE_BASE_INSTANCE:
2216 return nir_intrinsic_load_base_instance;
2217 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2218 return nir_intrinsic_load_vertex_id_zero_base;
2219 case SYSTEM_VALUE_IS_INDEXED_DRAW:
2220 return nir_intrinsic_load_is_indexed_draw;
2221 case SYSTEM_VALUE_FIRST_VERTEX:
2222 return nir_intrinsic_load_first_vertex;
2223 case SYSTEM_VALUE_BASE_VERTEX:
2224 return nir_intrinsic_load_base_vertex;
2225 case SYSTEM_VALUE_INVOCATION_ID:
2226 return nir_intrinsic_load_invocation_id;
2227 case SYSTEM_VALUE_FRAG_COORD:
2228 return nir_intrinsic_load_frag_coord;
2229 case SYSTEM_VALUE_POINT_COORD:
2230 return nir_intrinsic_load_point_coord;
2231 case SYSTEM_VALUE_LINE_COORD:
2232 return nir_intrinsic_load_line_coord;
2233 case SYSTEM_VALUE_FRONT_FACE:
2234 return nir_intrinsic_load_front_face;
2235 case SYSTEM_VALUE_SAMPLE_ID:
2236 return nir_intrinsic_load_sample_id;
2237 case SYSTEM_VALUE_SAMPLE_POS:
2238 return nir_intrinsic_load_sample_pos;
2239 case SYSTEM_VALUE_SAMPLE_POS_OR_CENTER:
2240 return nir_intrinsic_load_sample_pos_or_center;
2241 case SYSTEM_VALUE_SAMPLE_MASK_IN:
2242 return nir_intrinsic_load_sample_mask_in;
2243 case SYSTEM_VALUE_LAYER_ID:
2244 return nir_intrinsic_load_layer_id;
2245 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2246 return nir_intrinsic_load_local_invocation_id;
2247 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2248 return nir_intrinsic_load_local_invocation_index;
2249 case SYSTEM_VALUE_WORKGROUP_ID:
2250 return nir_intrinsic_load_workgroup_id;
2251 case SYSTEM_VALUE_BASE_WORKGROUP_ID:
2252 return nir_intrinsic_load_base_workgroup_id;
2253 case SYSTEM_VALUE_WORKGROUP_INDEX:
2254 return nir_intrinsic_load_workgroup_index;
2255 case SYSTEM_VALUE_NUM_WORKGROUPS:
2256 return nir_intrinsic_load_num_workgroups;
2257 case SYSTEM_VALUE_PRIMITIVE_ID:
2258 return nir_intrinsic_load_primitive_id;
2259 case SYSTEM_VALUE_TESS_COORD:
2260 return nir_intrinsic_load_tess_coord;
2261 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2262 return nir_intrinsic_load_tess_level_outer;
2263 case SYSTEM_VALUE_TESS_LEVEL_INNER:
2264 return nir_intrinsic_load_tess_level_inner;
2265 case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2266 return nir_intrinsic_load_tess_level_outer_default;
2267 case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2268 return nir_intrinsic_load_tess_level_inner_default;
2269 case SYSTEM_VALUE_VERTICES_IN:
2270 return nir_intrinsic_load_patch_vertices_in;
2271 case SYSTEM_VALUE_HELPER_INVOCATION:
2272 return nir_intrinsic_load_helper_invocation;
2273 case SYSTEM_VALUE_COLOR0:
2274 return nir_intrinsic_load_color0;
2275 case SYSTEM_VALUE_COLOR1:
2276 return nir_intrinsic_load_color1;
2277 case SYSTEM_VALUE_VIEW_INDEX:
2278 return nir_intrinsic_load_view_index;
2279 case SYSTEM_VALUE_SUBGROUP_SIZE:
2280 return nir_intrinsic_load_subgroup_size;
2281 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2282 return nir_intrinsic_load_subgroup_invocation;
2283 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2284 return nir_intrinsic_load_subgroup_eq_mask;
2285 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2286 return nir_intrinsic_load_subgroup_ge_mask;
2287 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2288 return nir_intrinsic_load_subgroup_gt_mask;
2289 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2290 return nir_intrinsic_load_subgroup_le_mask;
2291 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2292 return nir_intrinsic_load_subgroup_lt_mask;
2293 case SYSTEM_VALUE_NUM_SUBGROUPS:
2294 return nir_intrinsic_load_num_subgroups;
2295 case SYSTEM_VALUE_SUBGROUP_ID:
2296 return nir_intrinsic_load_subgroup_id;
2297 case SYSTEM_VALUE_WORKGROUP_SIZE:
2298 return nir_intrinsic_load_workgroup_size;
2299 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2300 return nir_intrinsic_load_global_invocation_id;
2301 case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2302 return nir_intrinsic_load_base_global_invocation_id;
2303 case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2304 return nir_intrinsic_load_global_invocation_index;
2305 case SYSTEM_VALUE_GLOBAL_GROUP_SIZE:
2306 return nir_intrinsic_load_global_size;
2307 case SYSTEM_VALUE_WORK_DIM:
2308 return nir_intrinsic_load_work_dim;
2309 case SYSTEM_VALUE_USER_DATA_AMD:
2310 return nir_intrinsic_load_user_data_amd;
2311 case SYSTEM_VALUE_RAY_LAUNCH_ID:
2312 return nir_intrinsic_load_ray_launch_id;
2313 case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2314 return nir_intrinsic_load_ray_launch_size;
2315 case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2316 return nir_intrinsic_load_ray_world_origin;
2317 case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2318 return nir_intrinsic_load_ray_world_direction;
2319 case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2320 return nir_intrinsic_load_ray_object_origin;
2321 case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2322 return nir_intrinsic_load_ray_object_direction;
2323 case SYSTEM_VALUE_RAY_T_MIN:
2324 return nir_intrinsic_load_ray_t_min;
2325 case SYSTEM_VALUE_RAY_T_MAX:
2326 return nir_intrinsic_load_ray_t_max;
2327 case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2328 return nir_intrinsic_load_ray_object_to_world;
2329 case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2330 return nir_intrinsic_load_ray_world_to_object;
2331 case SYSTEM_VALUE_RAY_HIT_KIND:
2332 return nir_intrinsic_load_ray_hit_kind;
2333 case SYSTEM_VALUE_RAY_FLAGS:
2334 return nir_intrinsic_load_ray_flags;
2335 case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2336 return nir_intrinsic_load_ray_geometry_index;
2337 case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2338 return nir_intrinsic_load_ray_instance_custom_index;
2339 case SYSTEM_VALUE_CULL_MASK:
2340 return nir_intrinsic_load_cull_mask;
2341 case SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS:
2342 return nir_intrinsic_load_ray_triangle_vertex_positions;
2343 case SYSTEM_VALUE_MESH_VIEW_COUNT:
2344 return nir_intrinsic_load_mesh_view_count;
2345 case SYSTEM_VALUE_FRAG_SHADING_RATE:
2346 return nir_intrinsic_load_frag_shading_rate;
2347 case SYSTEM_VALUE_FULLY_COVERED:
2348 return nir_intrinsic_load_fully_covered;
2349 case SYSTEM_VALUE_FRAG_SIZE:
2350 return nir_intrinsic_load_frag_size;
2351 case SYSTEM_VALUE_FRAG_INVOCATION_COUNT:
2352 return nir_intrinsic_load_frag_invocation_count;
2353 case SYSTEM_VALUE_SHADER_INDEX:
2354 return nir_intrinsic_load_shader_index;
2355 case SYSTEM_VALUE_COALESCED_INPUT_COUNT:
2356 return nir_intrinsic_load_coalesced_input_count;
2357 case SYSTEM_VALUE_WARPS_PER_SM_NV:
2358 return nir_intrinsic_load_warps_per_sm_nv;
2359 case SYSTEM_VALUE_SM_COUNT_NV:
2360 return nir_intrinsic_load_sm_count_nv;
2361 case SYSTEM_VALUE_WARP_ID_NV:
2362 return nir_intrinsic_load_warp_id_nv;
2363 case SYSTEM_VALUE_SM_ID_NV:
2364 return nir_intrinsic_load_sm_id_nv;
2365 default:
2366 unreachable("system value does not directly correspond to intrinsic");
2367 }
2368 }
2369
2370 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2371 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2372 {
2373 switch (intrin) {
2374 case nir_intrinsic_load_vertex_id:
2375 return SYSTEM_VALUE_VERTEX_ID;
2376 case nir_intrinsic_load_instance_id:
2377 return SYSTEM_VALUE_INSTANCE_ID;
2378 case nir_intrinsic_load_draw_id:
2379 return SYSTEM_VALUE_DRAW_ID;
2380 case nir_intrinsic_load_base_instance:
2381 return SYSTEM_VALUE_BASE_INSTANCE;
2382 case nir_intrinsic_load_vertex_id_zero_base:
2383 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2384 case nir_intrinsic_load_first_vertex:
2385 return SYSTEM_VALUE_FIRST_VERTEX;
2386 case nir_intrinsic_load_is_indexed_draw:
2387 return SYSTEM_VALUE_IS_INDEXED_DRAW;
2388 case nir_intrinsic_load_base_vertex:
2389 return SYSTEM_VALUE_BASE_VERTEX;
2390 case nir_intrinsic_load_invocation_id:
2391 return SYSTEM_VALUE_INVOCATION_ID;
2392 case nir_intrinsic_load_frag_coord:
2393 return SYSTEM_VALUE_FRAG_COORD;
2394 case nir_intrinsic_load_point_coord:
2395 return SYSTEM_VALUE_POINT_COORD;
2396 case nir_intrinsic_load_line_coord:
2397 return SYSTEM_VALUE_LINE_COORD;
2398 case nir_intrinsic_load_front_face:
2399 return SYSTEM_VALUE_FRONT_FACE;
2400 case nir_intrinsic_load_sample_id:
2401 return SYSTEM_VALUE_SAMPLE_ID;
2402 case nir_intrinsic_load_sample_pos:
2403 return SYSTEM_VALUE_SAMPLE_POS;
2404 case nir_intrinsic_load_sample_pos_or_center:
2405 return SYSTEM_VALUE_SAMPLE_POS_OR_CENTER;
2406 case nir_intrinsic_load_sample_mask_in:
2407 return SYSTEM_VALUE_SAMPLE_MASK_IN;
2408 case nir_intrinsic_load_layer_id:
2409 return SYSTEM_VALUE_LAYER_ID;
2410 case nir_intrinsic_load_local_invocation_id:
2411 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2412 case nir_intrinsic_load_local_invocation_index:
2413 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2414 case nir_intrinsic_load_num_workgroups:
2415 return SYSTEM_VALUE_NUM_WORKGROUPS;
2416 case nir_intrinsic_load_workgroup_id:
2417 return SYSTEM_VALUE_WORKGROUP_ID;
2418 case nir_intrinsic_load_base_workgroup_id:
2419 return SYSTEM_VALUE_BASE_WORKGROUP_ID;
2420 case nir_intrinsic_load_workgroup_index:
2421 return SYSTEM_VALUE_WORKGROUP_INDEX;
2422 case nir_intrinsic_load_primitive_id:
2423 return SYSTEM_VALUE_PRIMITIVE_ID;
2424 case nir_intrinsic_load_tess_coord:
2425 case nir_intrinsic_load_tess_coord_xy:
2426 return SYSTEM_VALUE_TESS_COORD;
2427 case nir_intrinsic_load_tess_level_outer:
2428 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2429 case nir_intrinsic_load_tess_level_inner:
2430 return SYSTEM_VALUE_TESS_LEVEL_INNER;
2431 case nir_intrinsic_load_tess_level_outer_default:
2432 return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2433 case nir_intrinsic_load_tess_level_inner_default:
2434 return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2435 case nir_intrinsic_load_patch_vertices_in:
2436 return SYSTEM_VALUE_VERTICES_IN;
2437 case nir_intrinsic_load_helper_invocation:
2438 return SYSTEM_VALUE_HELPER_INVOCATION;
2439 case nir_intrinsic_load_color0:
2440 return SYSTEM_VALUE_COLOR0;
2441 case nir_intrinsic_load_color1:
2442 return SYSTEM_VALUE_COLOR1;
2443 case nir_intrinsic_load_view_index:
2444 return SYSTEM_VALUE_VIEW_INDEX;
2445 case nir_intrinsic_load_subgroup_size:
2446 return SYSTEM_VALUE_SUBGROUP_SIZE;
2447 case nir_intrinsic_load_subgroup_invocation:
2448 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2449 case nir_intrinsic_load_subgroup_eq_mask:
2450 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2451 case nir_intrinsic_load_subgroup_ge_mask:
2452 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2453 case nir_intrinsic_load_subgroup_gt_mask:
2454 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2455 case nir_intrinsic_load_subgroup_le_mask:
2456 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2457 case nir_intrinsic_load_subgroup_lt_mask:
2458 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2459 case nir_intrinsic_load_num_subgroups:
2460 return SYSTEM_VALUE_NUM_SUBGROUPS;
2461 case nir_intrinsic_load_subgroup_id:
2462 return SYSTEM_VALUE_SUBGROUP_ID;
2463 case nir_intrinsic_load_workgroup_size:
2464 return SYSTEM_VALUE_WORKGROUP_SIZE;
2465 case nir_intrinsic_load_global_invocation_id:
2466 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2467 case nir_intrinsic_load_base_global_invocation_id:
2468 return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2469 case nir_intrinsic_load_global_invocation_index:
2470 return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2471 case nir_intrinsic_load_global_size:
2472 return SYSTEM_VALUE_GLOBAL_GROUP_SIZE;
2473 case nir_intrinsic_load_work_dim:
2474 return SYSTEM_VALUE_WORK_DIM;
2475 case nir_intrinsic_load_user_data_amd:
2476 return SYSTEM_VALUE_USER_DATA_AMD;
2477 case nir_intrinsic_load_barycentric_model:
2478 return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2479 case nir_intrinsic_load_gs_header_ir3:
2480 return SYSTEM_VALUE_GS_HEADER_IR3;
2481 case nir_intrinsic_load_tcs_header_ir3:
2482 return SYSTEM_VALUE_TCS_HEADER_IR3;
2483 case nir_intrinsic_load_ray_launch_id:
2484 return SYSTEM_VALUE_RAY_LAUNCH_ID;
2485 case nir_intrinsic_load_ray_launch_size:
2486 return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2487 case nir_intrinsic_load_ray_world_origin:
2488 return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2489 case nir_intrinsic_load_ray_world_direction:
2490 return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2491 case nir_intrinsic_load_ray_object_origin:
2492 return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2493 case nir_intrinsic_load_ray_object_direction:
2494 return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2495 case nir_intrinsic_load_ray_t_min:
2496 return SYSTEM_VALUE_RAY_T_MIN;
2497 case nir_intrinsic_load_ray_t_max:
2498 return SYSTEM_VALUE_RAY_T_MAX;
2499 case nir_intrinsic_load_ray_object_to_world:
2500 return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2501 case nir_intrinsic_load_ray_world_to_object:
2502 return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2503 case nir_intrinsic_load_ray_hit_kind:
2504 return SYSTEM_VALUE_RAY_HIT_KIND;
2505 case nir_intrinsic_load_ray_flags:
2506 return SYSTEM_VALUE_RAY_FLAGS;
2507 case nir_intrinsic_load_ray_geometry_index:
2508 return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2509 case nir_intrinsic_load_ray_instance_custom_index:
2510 return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2511 case nir_intrinsic_load_cull_mask:
2512 return SYSTEM_VALUE_CULL_MASK;
2513 case nir_intrinsic_load_ray_triangle_vertex_positions:
2514 return SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS;
2515 case nir_intrinsic_load_frag_shading_rate:
2516 return SYSTEM_VALUE_FRAG_SHADING_RATE;
2517 case nir_intrinsic_load_mesh_view_count:
2518 return SYSTEM_VALUE_MESH_VIEW_COUNT;
2519 case nir_intrinsic_load_fully_covered:
2520 return SYSTEM_VALUE_FULLY_COVERED;
2521 case nir_intrinsic_load_frag_size:
2522 return SYSTEM_VALUE_FRAG_SIZE;
2523 case nir_intrinsic_load_frag_invocation_count:
2524 return SYSTEM_VALUE_FRAG_INVOCATION_COUNT;
2525 case nir_intrinsic_load_shader_index:
2526 return SYSTEM_VALUE_SHADER_INDEX;
2527 case nir_intrinsic_load_coalesced_input_count:
2528 return SYSTEM_VALUE_COALESCED_INPUT_COUNT;
2529 case nir_intrinsic_load_warps_per_sm_nv:
2530 return SYSTEM_VALUE_WARPS_PER_SM_NV;
2531 case nir_intrinsic_load_sm_count_nv:
2532 return SYSTEM_VALUE_SM_COUNT_NV;
2533 case nir_intrinsic_load_warp_id_nv:
2534 return SYSTEM_VALUE_WARP_ID_NV;
2535 case nir_intrinsic_load_sm_id_nv:
2536 return SYSTEM_VALUE_SM_ID_NV;
2537 default:
2538 unreachable("intrinsic doesn't produce a system value");
2539 }
2540 }
2541
2542 /* OpenGL utility method that remaps the location attributes if they are
2543 * doubles. Not needed for vulkan due the differences on the input location
2544 * count for doubles on vulkan vs OpenGL
2545 *
2546 * The bitfield returned in dual_slot is one bit for each double input slot in
2547 * the original OpenGL single-slot input numbering. The mapping from old
2548 * locations to new locations is as follows:
2549 *
2550 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2551 */
2552 void
nir_remap_dual_slot_attributes(nir_shader * shader,uint64_t * dual_slot)2553 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2554 {
2555 assert(shader->info.stage == MESA_SHADER_VERTEX);
2556
2557 *dual_slot = 0;
2558 nir_foreach_shader_in_variable(var, shader) {
2559 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2560 unsigned slots = glsl_count_attribute_slots(var->type, true);
2561 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2562 }
2563 }
2564
2565 nir_foreach_shader_in_variable(var, shader) {
2566 var->data.location +=
2567 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2568 }
2569 }
2570
2571 /* Returns an attribute mask that has been re-compacted using the given
2572 * dual_slot mask.
2573 */
2574 uint64_t
nir_get_single_slot_attribs_mask(uint64_t attribs,uint64_t dual_slot)2575 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2576 {
2577 while (dual_slot) {
2578 unsigned loc = u_bit_scan64(&dual_slot);
2579 /* mask of all bits up to and including loc */
2580 uint64_t mask = BITFIELD64_MASK(loc + 1);
2581 attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2582 }
2583 return attribs;
2584 }
2585
2586 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_def * src,bool bindless)2587 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
2588 bool bindless)
2589 {
2590 enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2591
2592 /* Image intrinsics only have one of these */
2593 assert(!nir_intrinsic_has_src_type(intrin) ||
2594 !nir_intrinsic_has_dest_type(intrin));
2595
2596 nir_alu_type data_type = nir_type_invalid;
2597 if (nir_intrinsic_has_src_type(intrin))
2598 data_type = nir_intrinsic_src_type(intrin);
2599 if (nir_intrinsic_has_dest_type(intrin))
2600 data_type = nir_intrinsic_dest_type(intrin);
2601
2602 nir_atomic_op atomic_op = 0;
2603 if (nir_intrinsic_has_atomic_op(intrin))
2604 atomic_op = nir_intrinsic_atomic_op(intrin);
2605
2606 switch (intrin->intrinsic) {
2607 #define CASE(op) \
2608 case nir_intrinsic_image_deref_##op: \
2609 intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2610 : nir_intrinsic_image_##op; \
2611 break;
2612 CASE(load)
2613 CASE(sparse_load)
2614 CASE(store)
2615 CASE(atomic)
2616 CASE(atomic_swap)
2617 CASE(size)
2618 CASE(samples)
2619 CASE(load_raw_intel)
2620 CASE(store_raw_intel)
2621 CASE(fragment_mask_load_amd)
2622 CASE(store_block_agx)
2623 #undef CASE
2624 default:
2625 unreachable("Unhanded image intrinsic");
2626 }
2627
2628 nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2629
2630 /* Only update the format if the intrinsic doesn't have one set */
2631 if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2632 nir_intrinsic_set_format(intrin, var->data.image.format);
2633
2634 nir_intrinsic_set_access(intrin, access | var->data.access);
2635 if (nir_intrinsic_has_src_type(intrin))
2636 nir_intrinsic_set_src_type(intrin, data_type);
2637 if (nir_intrinsic_has_dest_type(intrin))
2638 nir_intrinsic_set_dest_type(intrin, data_type);
2639
2640 if (nir_intrinsic_has_atomic_op(intrin))
2641 nir_intrinsic_set_atomic_op(intrin, atomic_op);
2642
2643 nir_src_rewrite(&intrin->src[0], src);
2644 }
2645
2646 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2647 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2648 {
2649 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2650 int coords = glsl_get_sampler_dim_coordinate_components(dim);
2651 if (dim == GLSL_SAMPLER_DIM_CUBE)
2652 return coords;
2653 else
2654 return coords + nir_intrinsic_image_array(instr);
2655 }
2656
2657 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2658 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2659 {
2660 switch (call->intrinsic) {
2661 case nir_intrinsic_trace_ray:
2662 case nir_intrinsic_rt_trace_ray:
2663 return &call->src[10];
2664 case nir_intrinsic_execute_callable:
2665 case nir_intrinsic_rt_execute_callable:
2666 return &call->src[1];
2667 default:
2668 unreachable("Not a call intrinsic");
2669 return NULL;
2670 }
2671 }
2672
2673 nir_binding
nir_chase_binding(nir_src rsrc)2674 nir_chase_binding(nir_src rsrc)
2675 {
2676 nir_binding res = { 0 };
2677 if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2678 const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2679 bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2680 while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2681 nir_deref_instr *deref = nir_src_as_deref(rsrc);
2682
2683 if (deref->deref_type == nir_deref_type_var) {
2684 res.success = true;
2685 res.var = deref->var;
2686 res.desc_set = deref->var->data.descriptor_set;
2687 res.binding = deref->var->data.binding;
2688 return res;
2689 } else if (deref->deref_type == nir_deref_type_array && is_image) {
2690 if (res.num_indices == ARRAY_SIZE(res.indices))
2691 return (nir_binding){ 0 };
2692 res.indices[res.num_indices++] = deref->arr.index;
2693 }
2694
2695 rsrc = deref->parent;
2696 }
2697 }
2698
2699 /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2700 * when removing the offset from addresses. We also consider
2701 * nir_op_is_vec_or_mov() instructions to skip trimming of
2702 * vec2_index_32bit_offset addresses after lowering ALU to scalar.
2703 */
2704 unsigned num_components = nir_src_num_components(rsrc);
2705 while (true) {
2706 nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2707 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2708 if (alu && alu->op == nir_op_mov) {
2709 for (unsigned i = 0; i < num_components; i++) {
2710 if (alu->src[0].swizzle[i] != i)
2711 return (nir_binding){ 0 };
2712 }
2713 rsrc = alu->src[0].src;
2714 } else if (alu && nir_op_is_vec(alu->op)) {
2715 for (unsigned i = 0; i < num_components; i++) {
2716 if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2717 return (nir_binding){ 0 };
2718 }
2719 rsrc = alu->src[0].src;
2720 } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2721 /* The caller might want to be aware if only the first invocation of
2722 * the indices are used.
2723 */
2724 res.read_first_invocation = true;
2725 rsrc = intrin->src[0];
2726 } else {
2727 break;
2728 }
2729 }
2730
2731 if (nir_src_is_const(rsrc)) {
2732 /* GL binding model after deref lowering */
2733 res.success = true;
2734 /* Can't use just nir_src_as_uint. Vulkan resource index produces a
2735 * vec2. Some drivers lower it to vec1 (to handle get_ssbo_size for
2736 * example) but others just keep it around as a vec2 (v3dv).
2737 */
2738 res.binding = nir_src_comp_as_uint(rsrc, 0);
2739 return res;
2740 }
2741
2742 /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2743
2744 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2745 if (!intrin)
2746 return (nir_binding){ 0 };
2747
2748 /* Intel resource, similar to load_vulkan_descriptor after it has been
2749 * lowered.
2750 */
2751 if (intrin->intrinsic == nir_intrinsic_resource_intel) {
2752 res.success = true;
2753 res.desc_set = nir_intrinsic_desc_set(intrin);
2754 res.binding = nir_intrinsic_binding(intrin);
2755 /* nir_intrinsic_resource_intel has 3 sources, but src[2] is included in
2756 * src[1], it is kept around for other purposes.
2757 */
2758 res.num_indices = 2;
2759 res.indices[0] = intrin->src[0];
2760 res.indices[1] = intrin->src[1];
2761 return res;
2762 }
2763
2764 /* skip load_vulkan_descriptor */
2765 if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2766 intrin = nir_src_as_intrinsic(intrin->src[0]);
2767 if (!intrin)
2768 return (nir_binding){ 0 };
2769 }
2770
2771 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2772 return (nir_binding){ 0 };
2773
2774 assert(res.num_indices == 0);
2775 res.success = true;
2776 res.desc_set = nir_intrinsic_desc_set(intrin);
2777 res.binding = nir_intrinsic_binding(intrin);
2778 res.num_indices = 1;
2779 res.indices[0] = intrin->src[0];
2780 return res;
2781 }
2782
2783 nir_variable *
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2784 nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2785 {
2786 nir_variable *binding_var = NULL;
2787 unsigned count = 0;
2788
2789 if (!binding.success)
2790 return NULL;
2791
2792 if (binding.var)
2793 return binding.var;
2794
2795 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2796 if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2797 binding_var = var;
2798 count++;
2799 }
2800 }
2801
2802 /* Be conservative if another variable is using the same binding/desc_set
2803 * because the access mask might be different and we can't get it reliably.
2804 */
2805 if (count > 1)
2806 return NULL;
2807
2808 return binding_var;
2809 }
2810
2811 nir_scalar
nir_scalar_chase_movs(nir_scalar s)2812 nir_scalar_chase_movs(nir_scalar s)
2813 {
2814 while (nir_scalar_is_alu(s)) {
2815 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2816 if (alu->op == nir_op_mov) {
2817 s.def = alu->src[0].src.ssa;
2818 s.comp = alu->src[0].swizzle[s.comp];
2819 } else if (nir_op_is_vec(alu->op)) {
2820 s.def = alu->src[s.comp].src.ssa;
2821 s.comp = alu->src[s.comp].swizzle[0];
2822 } else {
2823 break;
2824 }
2825 }
2826
2827 return s;
2828 }
2829
2830 nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)2831 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
2832 {
2833 switch (base_type) {
2834 /* clang-format off */
2835 case GLSL_TYPE_BOOL: return nir_type_bool1;
2836 case GLSL_TYPE_UINT: return nir_type_uint32;
2837 case GLSL_TYPE_INT: return nir_type_int32;
2838 case GLSL_TYPE_UINT16: return nir_type_uint16;
2839 case GLSL_TYPE_INT16: return nir_type_int16;
2840 case GLSL_TYPE_UINT8: return nir_type_uint8;
2841 case GLSL_TYPE_INT8: return nir_type_int8;
2842 case GLSL_TYPE_UINT64: return nir_type_uint64;
2843 case GLSL_TYPE_INT64: return nir_type_int64;
2844 case GLSL_TYPE_FLOAT: return nir_type_float32;
2845 case GLSL_TYPE_FLOAT16: return nir_type_float16;
2846 case GLSL_TYPE_DOUBLE: return nir_type_float64;
2847 /* clang-format on */
2848
2849 case GLSL_TYPE_COOPERATIVE_MATRIX:
2850 case GLSL_TYPE_SAMPLER:
2851 case GLSL_TYPE_TEXTURE:
2852 case GLSL_TYPE_IMAGE:
2853 case GLSL_TYPE_ATOMIC_UINT:
2854 case GLSL_TYPE_STRUCT:
2855 case GLSL_TYPE_INTERFACE:
2856 case GLSL_TYPE_ARRAY:
2857 case GLSL_TYPE_VOID:
2858 case GLSL_TYPE_SUBROUTINE:
2859 case GLSL_TYPE_ERROR:
2860 return nir_type_invalid;
2861 }
2862
2863 unreachable("unknown type");
2864 }
2865
2866 enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)2867 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
2868 {
2869 /* clang-format off */
2870 switch (base_type) {
2871 case nir_type_bool1: return GLSL_TYPE_BOOL;
2872 case nir_type_uint32: return GLSL_TYPE_UINT;
2873 case nir_type_int32: return GLSL_TYPE_INT;
2874 case nir_type_uint16: return GLSL_TYPE_UINT16;
2875 case nir_type_int16: return GLSL_TYPE_INT16;
2876 case nir_type_uint8: return GLSL_TYPE_UINT8;
2877 case nir_type_int8: return GLSL_TYPE_INT8;
2878 case nir_type_uint64: return GLSL_TYPE_UINT64;
2879 case nir_type_int64: return GLSL_TYPE_INT64;
2880 case nir_type_float32: return GLSL_TYPE_FLOAT;
2881 case nir_type_float16: return GLSL_TYPE_FLOAT16;
2882 case nir_type_float64: return GLSL_TYPE_DOUBLE;
2883 default: unreachable("Not a sized nir_alu_type");
2884 }
2885 /* clang-format on */
2886 }
2887
2888 nir_op
nir_op_vec(unsigned num_components)2889 nir_op_vec(unsigned num_components)
2890 {
2891 /* clang-format off */
2892 switch (num_components) {
2893 case 1: return nir_op_mov;
2894 case 2: return nir_op_vec2;
2895 case 3: return nir_op_vec3;
2896 case 4: return nir_op_vec4;
2897 case 5: return nir_op_vec5;
2898 case 8: return nir_op_vec8;
2899 case 16: return nir_op_vec16;
2900 default: unreachable("bad component count");
2901 }
2902 /* clang-format on */
2903 }
2904
2905 bool
nir_op_is_vec(nir_op op)2906 nir_op_is_vec(nir_op op)
2907 {
2908 switch (op) {
2909 case nir_op_vec2:
2910 case nir_op_vec3:
2911 case nir_op_vec4:
2912 case nir_op_vec5:
2913 case nir_op_vec8:
2914 case nir_op_vec16:
2915 return true;
2916 default:
2917 return false;
2918 }
2919 }
2920
2921 nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)2922 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
2923 {
2924 nir_component_mask_t read_mask = 0;
2925 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
2926 if (!nir_alu_instr_channel_used(instr, src, c))
2927 continue;
2928
2929 read_mask |= (1 << instr->src[src].swizzle[c]);
2930 }
2931 return read_mask;
2932 }
2933
2934 unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)2935 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
2936 {
2937 if (nir_op_infos[instr->op].input_sizes[src] > 0)
2938 return nir_op_infos[instr->op].input_sizes[src];
2939
2940 return instr->def.num_components;
2941 }
2942
2943 #define CASE_ALL_SIZES(op) \
2944 case op: \
2945 case op##8: \
2946 case op##16: \
2947 case op##32:
2948
2949 bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)2950 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
2951 {
2952 switch (instr->op) {
2953 CASE_ALL_SIZES(nir_op_flt)
2954 CASE_ALL_SIZES(nir_op_fge)
2955 CASE_ALL_SIZES(nir_op_fltu)
2956 CASE_ALL_SIZES(nir_op_fgeu)
2957 CASE_ALL_SIZES(nir_op_feq)
2958 CASE_ALL_SIZES(nir_op_fneu)
2959 CASE_ALL_SIZES(nir_op_fequ)
2960 CASE_ALL_SIZES(nir_op_fneo)
2961 CASE_ALL_SIZES(nir_op_funord)
2962 CASE_ALL_SIZES(nir_op_ford)
2963 CASE_ALL_SIZES(nir_op_ilt)
2964 CASE_ALL_SIZES(nir_op_ult)
2965 CASE_ALL_SIZES(nir_op_ige)
2966 CASE_ALL_SIZES(nir_op_uge)
2967 CASE_ALL_SIZES(nir_op_ieq)
2968 CASE_ALL_SIZES(nir_op_ine)
2969 CASE_ALL_SIZES(nir_op_bitz)
2970 CASE_ALL_SIZES(nir_op_bitnz)
2971 case nir_op_inot:
2972 return true;
2973 default:
2974 return false;
2975 }
2976 }
2977
2978 #undef CASE_ALL_SIZES
2979
2980 unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)2981 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
2982 {
2983 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
2984 assert(srcn < info->num_srcs);
2985 if (info->src_components[srcn] > 0)
2986 return info->src_components[srcn];
2987 else if (info->src_components[srcn] == 0)
2988 return intr->num_components;
2989 else
2990 return nir_src_num_components(intr->src[srcn]);
2991 }
2992
2993 unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)2994 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
2995 {
2996 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
2997 if (!info->has_dest)
2998 return 0;
2999 else if (info->dest_components)
3000 return info->dest_components;
3001 else
3002 return intr->num_components;
3003 }
3004
3005 nir_alu_type
nir_intrinsic_instr_src_type(const nir_intrinsic_instr * intrin,unsigned src)3006 nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src)
3007 {
3008 /* We could go nuts here, but we'll just handle a few simple
3009 * cases and let everything else be untyped.
3010 */
3011 switch (intrin->intrinsic) {
3012 case nir_intrinsic_store_deref: {
3013 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3014 if (src == 1)
3015 return nir_get_nir_type_for_glsl_type(deref->type);
3016 break;
3017 }
3018
3019 case nir_intrinsic_store_output:
3020 if (src == 0)
3021 return nir_intrinsic_src_type(intrin);
3022 break;
3023
3024 default:
3025 break;
3026 }
3027
3028 /* For the most part, we leave other intrinsics alone. Most
3029 * of them don't matter in OpenGL ES 2.0 drivers anyway.
3030 * However, we should at least check if this is some sort of
3031 * IO intrinsic and flag it's offset and index sources.
3032 */
3033 {
3034 int offset_src_idx = nir_get_io_offset_src_number(intrin);
3035 if (src == offset_src_idx) {
3036 const nir_src *offset_src = offset_src_idx >= 0 ? &intrin->src[offset_src_idx] : NULL;
3037 if (offset_src)
3038 return nir_type_int;
3039 }
3040 }
3041
3042 return nir_type_invalid;
3043 }
3044
3045 nir_alu_type
nir_intrinsic_instr_dest_type(const nir_intrinsic_instr * intrin)3046 nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin)
3047 {
3048 /* We could go nuts here, but we'll just handle a few simple
3049 * cases and let everything else be untyped.
3050 */
3051 switch (intrin->intrinsic) {
3052 case nir_intrinsic_load_deref: {
3053 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3054 return nir_get_nir_type_for_glsl_type(deref->type);
3055 }
3056
3057 case nir_intrinsic_load_input:
3058 case nir_intrinsic_load_per_primitive_input:
3059 case nir_intrinsic_load_uniform:
3060 return nir_intrinsic_dest_type(intrin);
3061
3062 default:
3063 break;
3064 }
3065
3066 return nir_type_invalid;
3067 }
3068
3069 /**
3070 * Helper to copy const_index[] from src to dst, without assuming they
3071 * match in order.
3072 */
3073 void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)3074 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
3075 {
3076 if (src->intrinsic == dst->intrinsic) {
3077 memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
3078 return;
3079 }
3080
3081 const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
3082 const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
3083
3084 for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
3085 if (src_info->index_map[i] == 0)
3086 continue;
3087
3088 /* require that dst instruction also uses the same const_index[]: */
3089 assert(dst_info->index_map[i] > 0);
3090
3091 dst->const_index[dst_info->index_map[i] - 1] =
3092 src->const_index[src_info->index_map[i] - 1];
3093 }
3094 }
3095
3096 bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)3097 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
3098 {
3099 switch (instr->op) {
3100 case nir_texop_txf:
3101 case nir_texop_txf_ms:
3102 case nir_texop_txs:
3103 case nir_texop_query_levels:
3104 case nir_texop_texture_samples:
3105 case nir_texop_samples_identical:
3106 case nir_texop_descriptor_amd:
3107 return false;
3108 default:
3109 return true;
3110 }
3111 }
3112
3113 unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)3114 nir_tex_instr_result_size(const nir_tex_instr *instr)
3115 {
3116 switch (instr->op) {
3117 case nir_texop_txs: {
3118 unsigned ret;
3119 switch (instr->sampler_dim) {
3120 case GLSL_SAMPLER_DIM_1D:
3121 case GLSL_SAMPLER_DIM_BUF:
3122 ret = 1;
3123 break;
3124 case GLSL_SAMPLER_DIM_2D:
3125 case GLSL_SAMPLER_DIM_CUBE:
3126 case GLSL_SAMPLER_DIM_MS:
3127 case GLSL_SAMPLER_DIM_RECT:
3128 case GLSL_SAMPLER_DIM_EXTERNAL:
3129 case GLSL_SAMPLER_DIM_SUBPASS:
3130 case GLSL_SAMPLER_DIM_SUBPASS_MS:
3131 ret = 2;
3132 break;
3133 case GLSL_SAMPLER_DIM_3D:
3134 ret = 3;
3135 break;
3136 default:
3137 unreachable("not reached");
3138 }
3139 if (instr->is_array)
3140 ret++;
3141 return ret;
3142 }
3143
3144 case nir_texop_lod:
3145 return 2;
3146
3147 case nir_texop_texture_samples:
3148 case nir_texop_query_levels:
3149 case nir_texop_samples_identical:
3150 case nir_texop_fragment_mask_fetch_amd:
3151 case nir_texop_lod_bias_agx:
3152 case nir_texop_has_custom_border_color_agx:
3153 return 1;
3154
3155 case nir_texop_descriptor_amd:
3156 return instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
3157
3158 case nir_texop_sampler_descriptor_amd:
3159 return 4;
3160
3161 case nir_texop_hdr_dim_nv:
3162 case nir_texop_tex_type_nv:
3163 return 4;
3164
3165 case nir_texop_custom_border_color_agx:
3166 return 4;
3167
3168 default:
3169 if (instr->is_shadow && instr->is_new_style_shadow)
3170 return 1;
3171
3172 return 4;
3173 }
3174 }
3175
3176 bool
nir_tex_instr_is_query(const nir_tex_instr * instr)3177 nir_tex_instr_is_query(const nir_tex_instr *instr)
3178 {
3179 switch (instr->op) {
3180 case nir_texop_txs:
3181 case nir_texop_lod:
3182 case nir_texop_texture_samples:
3183 case nir_texop_query_levels:
3184 case nir_texop_descriptor_amd:
3185 case nir_texop_sampler_descriptor_amd:
3186 case nir_texop_lod_bias_agx:
3187 case nir_texop_custom_border_color_agx:
3188 case nir_texop_has_custom_border_color_agx:
3189 case nir_texop_hdr_dim_nv:
3190 case nir_texop_tex_type_nv:
3191 return true;
3192 case nir_texop_tex:
3193 case nir_texop_txb:
3194 case nir_texop_txl:
3195 case nir_texop_txd:
3196 case nir_texop_txf:
3197 case nir_texop_txf_ms:
3198 case nir_texop_txf_ms_fb:
3199 case nir_texop_txf_ms_mcs_intel:
3200 case nir_texop_tg4:
3201 case nir_texop_samples_identical:
3202 case nir_texop_fragment_mask_fetch_amd:
3203 case nir_texop_fragment_fetch_amd:
3204 return false;
3205 default:
3206 unreachable("Invalid texture opcode");
3207 }
3208 }
3209
3210 bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)3211 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
3212 {
3213 switch (instr->op) {
3214 case nir_texop_tex:
3215 case nir_texop_txb:
3216 case nir_texop_lod:
3217 return true;
3218 case nir_texop_tg4:
3219 return instr->is_gather_implicit_lod;
3220 default:
3221 return false;
3222 }
3223 }
3224
3225 nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)3226 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
3227 {
3228 switch (instr->src[src].src_type) {
3229 case nir_tex_src_coord:
3230 switch (instr->op) {
3231 case nir_texop_txf:
3232 case nir_texop_txf_ms:
3233 case nir_texop_txf_ms_fb:
3234 case nir_texop_txf_ms_mcs_intel:
3235 case nir_texop_samples_identical:
3236 case nir_texop_fragment_fetch_amd:
3237 case nir_texop_fragment_mask_fetch_amd:
3238 return nir_type_int;
3239
3240 default:
3241 return nir_type_float;
3242 }
3243
3244 case nir_tex_src_lod:
3245 switch (instr->op) {
3246 case nir_texop_txs:
3247 case nir_texop_txf:
3248 case nir_texop_txf_ms:
3249 case nir_texop_fragment_fetch_amd:
3250 case nir_texop_fragment_mask_fetch_amd:
3251 return nir_type_int;
3252
3253 default:
3254 return nir_type_float;
3255 }
3256
3257 case nir_tex_src_projector:
3258 case nir_tex_src_comparator:
3259 case nir_tex_src_bias:
3260 case nir_tex_src_min_lod:
3261 case nir_tex_src_ddx:
3262 case nir_tex_src_ddy:
3263 case nir_tex_src_backend1:
3264 case nir_tex_src_backend2:
3265 return nir_type_float;
3266
3267 case nir_tex_src_offset:
3268 case nir_tex_src_ms_index:
3269 case nir_tex_src_plane:
3270 return nir_type_int;
3271
3272 case nir_tex_src_sampler_deref_intrinsic:
3273 case nir_tex_src_texture_deref_intrinsic:
3274 case nir_tex_src_ms_mcs_intel:
3275 case nir_tex_src_texture_deref:
3276 case nir_tex_src_sampler_deref:
3277 case nir_tex_src_texture_offset:
3278 case nir_tex_src_sampler_offset:
3279 case nir_tex_src_texture_handle:
3280 case nir_tex_src_sampler_handle:
3281 return nir_type_uint;
3282
3283 case nir_num_tex_src_types:
3284 unreachable("nir_num_tex_src_types is not a valid source type");
3285 }
3286
3287 unreachable("Invalid texture source type");
3288 }
3289
3290 unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)3291 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
3292 {
3293 if (instr->src[src].src_type == nir_tex_src_coord)
3294 return instr->coord_components;
3295
3296 /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
3297 if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
3298 return 4;
3299
3300 if (instr->src[src].src_type == nir_tex_src_ddx ||
3301 instr->src[src].src_type == nir_tex_src_ddy) {
3302
3303 if (instr->is_array && !instr->array_is_lowered_cube)
3304 return instr->coord_components - 1;
3305 else
3306 return instr->coord_components;
3307 }
3308
3309 if (instr->src[src].src_type == nir_tex_src_offset) {
3310 if (instr->is_array)
3311 return instr->coord_components - 1;
3312 else
3313 return instr->coord_components;
3314 }
3315
3316 if (instr->src[src].src_type == nir_tex_src_backend1 ||
3317 instr->src[src].src_type == nir_tex_src_backend2)
3318 return nir_src_num_components(instr->src[src].src);
3319
3320 /* For AMD, this can be a vec8/vec4 image/sampler descriptor. */
3321 if (instr->src[src].src_type == nir_tex_src_texture_handle ||
3322 instr->src[src].src_type == nir_tex_src_sampler_handle)
3323 return 0;
3324
3325 return 1;
3326 }
3327
3328 /**
3329 * Return which components are written into transform feedback buffers.
3330 * The result is relative to 0, not "component".
3331 */
3332 unsigned
nir_instr_xfb_write_mask(nir_intrinsic_instr * instr)3333 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
3334 {
3335 unsigned mask = 0;
3336
3337 if (nir_intrinsic_has_io_xfb(instr)) {
3338 unsigned wr_mask = nir_intrinsic_write_mask(instr) << nir_intrinsic_component(instr);
3339 assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
3340
3341 unsigned iter_mask = wr_mask;
3342 while (iter_mask) {
3343 unsigned i = u_bit_scan(&iter_mask);
3344 nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) : nir_intrinsic_io_xfb2(instr);
3345 if (xfb.out[i % 2].num_components)
3346 mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
3347 }
3348 }
3349
3350 return mask;
3351 }
3352
3353 /**
3354 * Whether an output slot is consumed by fixed-function logic.
3355 */
3356 bool
nir_slot_is_sysval_output(gl_varying_slot slot,gl_shader_stage next_shader)3357 nir_slot_is_sysval_output(gl_varying_slot slot, gl_shader_stage next_shader)
3358 {
3359 switch (next_shader) {
3360 case MESA_SHADER_FRAGMENT:
3361 return slot == VARYING_SLOT_POS ||
3362 slot == VARYING_SLOT_PSIZ ||
3363 slot == VARYING_SLOT_EDGE ||
3364 slot == VARYING_SLOT_CLIP_VERTEX ||
3365 slot == VARYING_SLOT_CLIP_DIST0 ||
3366 slot == VARYING_SLOT_CLIP_DIST1 ||
3367 slot == VARYING_SLOT_CULL_DIST0 ||
3368 slot == VARYING_SLOT_CULL_DIST1 ||
3369 slot == VARYING_SLOT_LAYER ||
3370 slot == VARYING_SLOT_VIEWPORT ||
3371 slot == VARYING_SLOT_VIEW_INDEX ||
3372 slot == VARYING_SLOT_VIEWPORT_MASK ||
3373 slot == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
3374 /* NV_mesh_shader_only */
3375 slot == VARYING_SLOT_PRIMITIVE_COUNT ||
3376 slot == VARYING_SLOT_PRIMITIVE_INDICES;
3377
3378 case MESA_SHADER_TESS_EVAL:
3379 return slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3380 slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3381 slot == VARYING_SLOT_BOUNDING_BOX0 ||
3382 slot == VARYING_SLOT_BOUNDING_BOX1;
3383
3384 case MESA_SHADER_MESH:
3385 /* NV_mesh_shader only */
3386 return slot == VARYING_SLOT_TASK_COUNT;
3387
3388 case MESA_SHADER_NONE:
3389 /* NONE means unknown. Check all possibilities. */
3390 return nir_slot_is_sysval_output(slot, MESA_SHADER_FRAGMENT) ||
3391 nir_slot_is_sysval_output(slot, MESA_SHADER_TESS_EVAL) ||
3392 nir_slot_is_sysval_output(slot, MESA_SHADER_MESH);
3393
3394 default:
3395 /* No other shaders have preceding shaders with sysval outputs. */
3396 return false;
3397 }
3398 }
3399
3400 /**
3401 * Whether an input/output slot is consumed by the next shader stage,
3402 * or written by the previous shader stage.
3403 */
3404 bool
nir_slot_is_varying(gl_varying_slot slot)3405 nir_slot_is_varying(gl_varying_slot slot)
3406 {
3407 return slot >= VARYING_SLOT_VAR0 ||
3408 slot == VARYING_SLOT_COL0 ||
3409 slot == VARYING_SLOT_COL1 ||
3410 slot == VARYING_SLOT_BFC0 ||
3411 slot == VARYING_SLOT_BFC1 ||
3412 slot == VARYING_SLOT_FOGC ||
3413 (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) ||
3414 slot == VARYING_SLOT_PNTC ||
3415 slot == VARYING_SLOT_CLIP_DIST0 ||
3416 slot == VARYING_SLOT_CLIP_DIST1 ||
3417 slot == VARYING_SLOT_CULL_DIST0 ||
3418 slot == VARYING_SLOT_CULL_DIST1 ||
3419 slot == VARYING_SLOT_PRIMITIVE_ID ||
3420 slot == VARYING_SLOT_LAYER ||
3421 slot == VARYING_SLOT_VIEWPORT ||
3422 slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3423 slot == VARYING_SLOT_TESS_LEVEL_INNER;
3424 }
3425
3426 bool
nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,gl_shader_stage next_shader)3427 nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
3428 gl_shader_stage next_shader)
3429 {
3430 return nir_slot_is_sysval_output(slot, next_shader) &&
3431 nir_slot_is_varying(slot);
3432 }
3433
3434 /**
3435 * This marks the output store instruction as not feeding the next shader
3436 * stage. If the instruction has no other use, it's removed.
3437 */
3438 bool
nir_remove_varying(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3439 nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3440 {
3441 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3442
3443 if ((!sem.no_sysval_output &&
3444 nir_slot_is_sysval_output(sem.location, next_shader)) ||
3445 nir_instr_xfb_write_mask(intr)) {
3446 /* Demote the store instruction. */
3447 sem.no_varying = true;
3448 nir_intrinsic_set_io_semantics(intr, sem);
3449 return false;
3450 } else {
3451 nir_instr_remove(&intr->instr);
3452 return true;
3453 }
3454 }
3455
3456 /**
3457 * This marks the output store instruction as not feeding fixed-function
3458 * logic. If the instruction has no other use, it's removed.
3459 */
3460 bool
nir_remove_sysval_output(nir_intrinsic_instr * intr)3461 nir_remove_sysval_output(nir_intrinsic_instr *intr)
3462 {
3463 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3464
3465 if ((!sem.no_varying && nir_slot_is_varying(sem.location)) ||
3466 nir_instr_xfb_write_mask(intr)) {
3467 /* Demote the store instruction. */
3468 sem.no_sysval_output = true;
3469 nir_intrinsic_set_io_semantics(intr, sem);
3470 return false;
3471 } else {
3472 nir_instr_remove(&intr->instr);
3473 return true;
3474 }
3475 }
3476
3477 void
nir_remove_non_entrypoints(nir_shader * nir)3478 nir_remove_non_entrypoints(nir_shader *nir)
3479 {
3480 nir_foreach_function_safe(func, nir) {
3481 if (!func->is_entrypoint)
3482 exec_node_remove(&func->node);
3483 }
3484 assert(exec_list_length(&nir->functions) == 1);
3485 }
3486
3487 void
nir_remove_non_exported(nir_shader * nir)3488 nir_remove_non_exported(nir_shader *nir)
3489 {
3490 nir_foreach_function_safe(func, nir) {
3491 if (!func->is_exported)
3492 exec_node_remove(&func->node);
3493 }
3494 }
3495
3496 unsigned
nir_static_workgroup_size(const nir_shader * s)3497 nir_static_workgroup_size(const nir_shader *s)
3498 {
3499 return s->info.workgroup_size[0] * s->info.workgroup_size[1] *
3500 s->info.workgroup_size[2];
3501 }
3502
3503 bool
nir_block_contains_work(nir_block * block)3504 nir_block_contains_work(nir_block *block)
3505 {
3506 if (!nir_cf_node_is_last(&block->cf_node))
3507 return true;
3508
3509 nir_foreach_instr(instr, block) {
3510 if (instr->type == nir_instr_type_phi)
3511 continue;
3512 if (instr->type != nir_instr_type_alu ||
3513 !nir_op_is_vec_or_mov(nir_instr_as_alu(instr)->op))
3514 return true;
3515 }
3516
3517 return false;
3518 }
3519