xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott ([email protected])
25  *
26  */
27 
28 #include "nir.h"
29 #include <assert.h>
30 #include <limits.h>
31 #include <math.h>
32 #include "util/half_float.h"
33 #include "util/macros.h"
34 #include "util/u_math.h"
35 #include "util/u_qsort.h"
36 #include "nir_builder.h"
37 #include "nir_control_flow_private.h"
38 #include "nir_worklist.h"
39 
40 #ifndef NDEBUG
41 uint32_t nir_debug = 0;
42 bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1] = { 0 };
43 
44 static const struct debug_named_value nir_debug_control[] = {
45    { "clone", NIR_DEBUG_CLONE,
46      "Test cloning a shader at each successful lowering/optimization call" },
47    { "serialize", NIR_DEBUG_SERIALIZE,
48      "Test serialize and deserialize shader at each successful lowering/optimization call" },
49    { "novalidate", NIR_DEBUG_NOVALIDATE,
50      "Disable shader validation at each successful lowering/optimization call" },
51    { "validate_ssa_dominance", NIR_DEBUG_VALIDATE_SSA_DOMINANCE,
52      "Validate SSA dominance in shader at each successful lowering/optimization call" },
53    { "tgsi", NIR_DEBUG_TGSI,
54      "Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
55    { "print", NIR_DEBUG_PRINT,
56      "Dump resulting shader after each successful lowering/optimization call" },
57    { "print_vs", NIR_DEBUG_PRINT_VS,
58      "Dump resulting vertex shader after each successful lowering/optimization call" },
59    { "print_tcs", NIR_DEBUG_PRINT_TCS,
60      "Dump resulting tessellation control shader after each successful lowering/optimization call" },
61    { "print_tes", NIR_DEBUG_PRINT_TES,
62      "Dump resulting tessellation evaluation shader after each successful lowering/optimization call" },
63    { "print_gs", NIR_DEBUG_PRINT_GS,
64      "Dump resulting geometry shader after each successful lowering/optimization call" },
65    { "print_fs", NIR_DEBUG_PRINT_FS,
66      "Dump resulting fragment shader after each successful lowering/optimization call" },
67    { "print_cs", NIR_DEBUG_PRINT_CS,
68      "Dump resulting compute shader after each successful lowering/optimization call" },
69    { "print_ts", NIR_DEBUG_PRINT_TS,
70      "Dump resulting task shader after each successful lowering/optimization call" },
71    { "print_ms", NIR_DEBUG_PRINT_MS,
72      "Dump resulting mesh shader after each successful lowering/optimization call" },
73    { "print_rgs", NIR_DEBUG_PRINT_RGS,
74      "Dump resulting raygen shader after each successful lowering/optimization call" },
75    { "print_ahs", NIR_DEBUG_PRINT_AHS,
76      "Dump resulting any-hit shader after each successful lowering/optimization call" },
77    { "print_chs", NIR_DEBUG_PRINT_CHS,
78      "Dump resulting closest-hit shader after each successful lowering/optimization call" },
79    { "print_mhs", NIR_DEBUG_PRINT_MHS,
80      "Dump resulting miss-hit shader after each successful lowering/optimization call" },
81    { "print_is", NIR_DEBUG_PRINT_IS,
82      "Dump resulting intersection shader after each successful lowering/optimization call" },
83    { "print_cbs", NIR_DEBUG_PRINT_CBS,
84      "Dump resulting callable shader after each successful lowering/optimization call" },
85    { "print_ks", NIR_DEBUG_PRINT_KS,
86      "Dump resulting kernel shader after each successful lowering/optimization call" },
87    { "print_no_inline_consts", NIR_DEBUG_PRINT_NO_INLINE_CONSTS,
88      "Do not print const value near each use of const SSA variable" },
89    { "print_internal", NIR_DEBUG_PRINT_INTERNAL,
90      "Print shaders even if they are marked as internal" },
91    { "print_pass_flags", NIR_DEBUG_PRINT_PASS_FLAGS,
92      "Print pass_flags for every instruction when pass_flags are non-zero" },
93    DEBUG_NAMED_VALUE_END
94 };
95 
96 DEBUG_GET_ONCE_FLAGS_OPTION(nir_debug, "NIR_DEBUG", nir_debug_control, 0)
97 
98 static void
nir_process_debug_variable_once(void)99 nir_process_debug_variable_once(void)
100 {
101    nir_debug = debug_get_option_nir_debug();
102 
103    /* clang-format off */
104    nir_debug_print_shader[MESA_SHADER_VERTEX]       = NIR_DEBUG(PRINT_VS);
105    nir_debug_print_shader[MESA_SHADER_TESS_CTRL]    = NIR_DEBUG(PRINT_TCS);
106    nir_debug_print_shader[MESA_SHADER_TESS_EVAL]    = NIR_DEBUG(PRINT_TES);
107    nir_debug_print_shader[MESA_SHADER_GEOMETRY]     = NIR_DEBUG(PRINT_GS);
108    nir_debug_print_shader[MESA_SHADER_FRAGMENT]     = NIR_DEBUG(PRINT_FS);
109    nir_debug_print_shader[MESA_SHADER_COMPUTE]      = NIR_DEBUG(PRINT_CS);
110    nir_debug_print_shader[MESA_SHADER_TASK]         = NIR_DEBUG(PRINT_TS);
111    nir_debug_print_shader[MESA_SHADER_MESH]         = NIR_DEBUG(PRINT_MS);
112    nir_debug_print_shader[MESA_SHADER_RAYGEN]       = NIR_DEBUG(PRINT_RGS);
113    nir_debug_print_shader[MESA_SHADER_ANY_HIT]      = NIR_DEBUG(PRINT_AHS);
114    nir_debug_print_shader[MESA_SHADER_CLOSEST_HIT]  = NIR_DEBUG(PRINT_CHS);
115    nir_debug_print_shader[MESA_SHADER_MISS]         = NIR_DEBUG(PRINT_MHS);
116    nir_debug_print_shader[MESA_SHADER_INTERSECTION] = NIR_DEBUG(PRINT_IS);
117    nir_debug_print_shader[MESA_SHADER_CALLABLE]     = NIR_DEBUG(PRINT_CBS);
118    nir_debug_print_shader[MESA_SHADER_KERNEL]       = NIR_DEBUG(PRINT_KS);
119    /* clang-format on */
120 }
121 
122 void
nir_process_debug_variable(void)123 nir_process_debug_variable(void)
124 {
125    static once_flag flag = ONCE_FLAG_INIT;
126    call_once(&flag, nir_process_debug_variable_once);
127 }
128 #endif
129 
130 /** Return true if the component mask "mask" with bit size "old_bit_size" can
131  * be re-interpreted to be used with "new_bit_size".
132  */
133 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)134 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
135                                    unsigned old_bit_size,
136                                    unsigned new_bit_size)
137 {
138    assert(util_is_power_of_two_nonzero(old_bit_size));
139    assert(util_is_power_of_two_nonzero(new_bit_size));
140 
141    if (old_bit_size == new_bit_size)
142       return true;
143 
144    if (old_bit_size == 1 || new_bit_size == 1)
145       return false;
146 
147    if (old_bit_size > new_bit_size) {
148       unsigned ratio = old_bit_size / new_bit_size;
149       return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
150    }
151 
152    unsigned iter = mask;
153    while (iter) {
154       int start, count;
155       u_bit_scan_consecutive_range(&iter, &start, &count);
156       start *= old_bit_size;
157       count *= old_bit_size;
158       if (start % new_bit_size != 0)
159          return false;
160       if (count % new_bit_size != 0)
161          return false;
162    }
163    return true;
164 }
165 
166 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
167  * it can be used can be used with "new_bit_size".
168  */
169 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)170 nir_component_mask_reinterpret(nir_component_mask_t mask,
171                                unsigned old_bit_size,
172                                unsigned new_bit_size)
173 {
174    assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
175 
176    if (old_bit_size == new_bit_size)
177       return mask;
178 
179    nir_component_mask_t new_mask = 0;
180    unsigned iter = mask;
181    while (iter) {
182       int start, count;
183       u_bit_scan_consecutive_range(&iter, &start, &count);
184       start = start * old_bit_size / new_bit_size;
185       count = count * old_bit_size / new_bit_size;
186       new_mask |= BITFIELD_RANGE(start, count);
187    }
188    return new_mask;
189 }
190 
191 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)192 nir_shader_create(void *mem_ctx,
193                   gl_shader_stage stage,
194                   const nir_shader_compiler_options *options,
195                   shader_info *si)
196 {
197    nir_shader *shader = rzalloc(mem_ctx, nir_shader);
198 
199    shader->gctx = gc_context(shader);
200 
201 #ifndef NDEBUG
202    nir_process_debug_variable();
203 #endif
204 
205    exec_list_make_empty(&shader->variables);
206 
207    shader->options = options;
208 
209    if (si) {
210       assert(si->stage == stage);
211       shader->info = *si;
212    } else {
213       shader->info.stage = stage;
214    }
215 
216    exec_list_make_empty(&shader->functions);
217 
218    shader->num_inputs = 0;
219    shader->num_outputs = 0;
220    shader->num_uniforms = 0;
221 
222    return shader;
223 }
224 
225 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)226 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
227 {
228    switch (var->data.mode) {
229    case nir_var_function_temp:
230       assert(!"nir_shader_add_variable cannot be used for local variables");
231       return;
232 
233    case nir_var_shader_temp:
234    case nir_var_shader_in:
235    case nir_var_shader_out:
236    case nir_var_uniform:
237    case nir_var_mem_ubo:
238    case nir_var_mem_ssbo:
239    case nir_var_image:
240    case nir_var_mem_shared:
241    case nir_var_system_value:
242    case nir_var_mem_push_const:
243    case nir_var_mem_constant:
244    case nir_var_shader_call_data:
245    case nir_var_ray_hit_attrib:
246    case nir_var_mem_task_payload:
247    case nir_var_mem_node_payload:
248    case nir_var_mem_node_payload_in:
249    case nir_var_mem_global:
250       break;
251 
252    default:
253       assert(!"invalid mode");
254       return;
255    }
256 
257    exec_list_push_tail(&shader->variables, &var->node);
258 }
259 
260 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)261 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
262                     const struct glsl_type *type, const char *name)
263 {
264    nir_variable *var = rzalloc(shader, nir_variable);
265    var->name = ralloc_strdup(var, name);
266    var->type = type;
267    var->data.mode = mode;
268    var->data.how_declared = nir_var_declared_normally;
269 
270    if ((mode == nir_var_shader_in &&
271         shader->info.stage != MESA_SHADER_VERTEX &&
272         shader->info.stage != MESA_SHADER_KERNEL) ||
273        (mode == nir_var_shader_out &&
274         shader->info.stage != MESA_SHADER_FRAGMENT))
275       var->data.interpolation = INTERP_MODE_SMOOTH;
276 
277    if (mode == nir_var_shader_in || mode == nir_var_uniform)
278       var->data.read_only = true;
279 
280    nir_shader_add_variable(shader, var);
281 
282    return var;
283 }
284 
285 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)286 nir_local_variable_create(nir_function_impl *impl,
287                           const struct glsl_type *type, const char *name)
288 {
289    nir_variable *var = rzalloc(impl->function->shader, nir_variable);
290    var->name = ralloc_strdup(var, name);
291    var->type = type;
292    var->data.mode = nir_var_function_temp;
293 
294    nir_function_impl_add_variable(impl, var);
295 
296    return var;
297 }
298 
299 nir_variable *
nir_state_variable_create(nir_shader * shader,const struct glsl_type * type,const char * name,const gl_state_index16 tokens[STATE_LENGTH])300 nir_state_variable_create(nir_shader *shader,
301                           const struct glsl_type *type,
302                           const char *name,
303                           const gl_state_index16 tokens[STATE_LENGTH])
304 {
305    nir_variable *var = nir_variable_create(shader, nir_var_uniform, type, name);
306    var->num_state_slots = 1;
307    var->state_slots = rzalloc_array(var, nir_state_slot, 1);
308    memcpy(var->state_slots[0].tokens, tokens,
309           sizeof(var->state_slots[0].tokens));
310    shader->num_uniforms++;
311    return var;
312 }
313 
314 nir_variable *
nir_create_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)315 nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
316                                   const struct glsl_type *type)
317 {
318    /* Only supporting non-array, or arrayed-io types, because otherwise we don't
319     * know how much to increment num_inputs/outputs
320     */
321    assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_unsized_array(type));
322 
323    const char *name;
324    switch (mode) {
325    case nir_var_shader_in:
326       if (shader->info.stage == MESA_SHADER_VERTEX)
327          name = gl_vert_attrib_name(location);
328       else
329          name = gl_varying_slot_name_for_stage(location, shader->info.stage);
330       break;
331 
332    case nir_var_shader_out:
333       if (shader->info.stage == MESA_SHADER_FRAGMENT)
334          name = gl_frag_result_name(location);
335       else
336          name = gl_varying_slot_name_for_stage(location, shader->info.stage);
337       break;
338 
339    case nir_var_system_value:
340       name = gl_system_value_name(location);
341       break;
342 
343    default:
344       unreachable("Unsupported variable mode");
345    }
346 
347    nir_variable *var = nir_variable_create(shader, mode, type, name);
348    var->data.location = location;
349 
350    switch (mode) {
351    case nir_var_shader_in:
352       var->data.driver_location = shader->num_inputs++;
353       break;
354 
355    case nir_var_shader_out:
356       var->data.driver_location = shader->num_outputs++;
357       break;
358 
359    case nir_var_system_value:
360       break;
361 
362    default:
363       unreachable("Unsupported variable mode");
364    }
365 
366    return var;
367 }
368 
369 nir_variable *
nir_get_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)370 nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
371                                const struct glsl_type *type)
372 {
373    nir_variable *var = nir_find_variable_with_location(shader, mode, location);
374    if (var) {
375       /* If this shader has location_fracs, this builder function is not suitable. */
376       assert(var->data.location_frac == 0);
377 
378       /* The variable for the slot should match what we expected. */
379       assert(type == var->type);
380       return var;
381    }
382 
383    return nir_create_variable_with_location(shader, mode, location, type);
384 }
385 
386 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)387 nir_find_variable_with_location(nir_shader *shader,
388                                 nir_variable_mode mode,
389                                 unsigned location)
390 {
391    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
392    nir_foreach_variable_with_modes(var, shader, mode) {
393       if (var->data.location == location)
394          return var;
395    }
396    return NULL;
397 }
398 
399 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)400 nir_find_variable_with_driver_location(nir_shader *shader,
401                                        nir_variable_mode mode,
402                                        unsigned location)
403 {
404    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
405    nir_foreach_variable_with_modes(var, shader, mode) {
406       if (var->data.driver_location == location)
407          return var;
408    }
409    return NULL;
410 }
411 
412 nir_variable *
nir_find_state_variable(nir_shader * s,gl_state_index16 tokens[STATE_LENGTH])413 nir_find_state_variable(nir_shader *s,
414                         gl_state_index16 tokens[STATE_LENGTH])
415 {
416    nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
417       if (var->num_state_slots == 1 &&
418           !memcmp(var->state_slots[0].tokens, tokens,
419                   sizeof(var->state_slots[0].tokens)))
420          return var;
421    }
422    return NULL;
423 }
424 
nir_find_sampler_variable_with_tex_index(nir_shader * shader,unsigned texture_index)425 nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
426                                                        unsigned texture_index)
427 {
428    nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
429       unsigned size =
430           glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
431       if ((glsl_type_is_texture(glsl_without_array(var->type)) ||
432            glsl_type_is_sampler(glsl_without_array(var->type))) &&
433           (var->data.binding == texture_index ||
434            (var->data.binding < texture_index &&
435             var->data.binding + size > texture_index)))
436          return var;
437    }
438    return NULL;
439 }
440 
441 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
442  * can't count on it on MSV and Android.  So we stuff the CMP function into
443  * each array element.  It's a bit messy and burns more memory but the list of
444  * variables should hever be all that long.
445  */
446 struct var_cmp {
447    nir_variable *var;
448    int (*cmp)(const nir_variable *, const nir_variable *);
449 };
450 
451 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)452 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
453 {
454    const struct var_cmp *a = _a;
455    const struct var_cmp *b = _b;
456    assert(a->cmp == b->cmp);
457    return a->cmp(a->var, b->var);
458 }
459 
460 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)461 nir_sort_variables_with_modes(nir_shader *shader,
462                               int (*cmp)(const nir_variable *,
463                                          const nir_variable *),
464                               nir_variable_mode modes)
465 {
466    unsigned num_vars = 0;
467    nir_foreach_variable_with_modes(var, shader, modes) {
468       ++num_vars;
469    }
470    struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
471    unsigned i = 0;
472    nir_foreach_variable_with_modes_safe(var, shader, modes) {
473       exec_node_remove(&var->node);
474       vars[i++] = (struct var_cmp){
475          .var = var,
476          .cmp = cmp,
477       };
478    }
479    assert(i == num_vars);
480 
481    util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
482 
483    for (i = 0; i < num_vars; i++)
484       exec_list_push_tail(&shader->variables, &vars[i].var->node);
485 
486    ralloc_free(vars);
487 }
488 
489 nir_function *
nir_function_create(nir_shader * shader,const char * name)490 nir_function_create(nir_shader *shader, const char *name)
491 {
492    nir_function *func = ralloc(shader, nir_function);
493 
494    exec_list_push_tail(&shader->functions, &func->node);
495 
496    func->name = ralloc_strdup(func, name);
497    func->shader = shader;
498    func->num_params = 0;
499    func->params = NULL;
500    func->impl = NULL;
501    func->is_entrypoint = false;
502    func->is_preamble = false;
503    func->dont_inline = false;
504    func->should_inline = false;
505    func->is_subroutine = false;
506    func->subroutine_index = 0;
507    func->num_subroutine_types = 0;
508    func->subroutine_types = NULL;
509 
510    /* Only meaningful for shader libraries, so don't export by default. */
511    func->is_exported = false;
512 
513    return func;
514 }
515 
516 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)517 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
518 {
519    dest->src = nir_src_for_ssa(src->src.ssa);
520    for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
521       dest->swizzle[i] = src->swizzle[i];
522 }
523 
524 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)525 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
526 {
527    static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
528    STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
529 
530    const nir_alu_src *src = &alu->src[srcn];
531    unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
532 
533    return (src->src.ssa->num_components == num_components) &&
534           (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
535 }
536 
537 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)538 cf_init(nir_cf_node *node, nir_cf_node_type type)
539 {
540    exec_node_init(&node->node);
541    node->parent = NULL;
542    node->type = type;
543 }
544 
545 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)546 nir_function_impl_create_bare(nir_shader *shader)
547 {
548    nir_function_impl *impl = ralloc(shader, nir_function_impl);
549 
550    impl->function = NULL;
551    impl->preamble = NULL;
552 
553    cf_init(&impl->cf_node, nir_cf_node_function);
554 
555    exec_list_make_empty(&impl->body);
556    exec_list_make_empty(&impl->locals);
557    impl->ssa_alloc = 0;
558    impl->num_blocks = 0;
559    impl->valid_metadata = nir_metadata_none;
560    impl->structured = true;
561 
562    /* create start & end blocks */
563    nir_block *start_block = nir_block_create(shader);
564    nir_block *end_block = nir_block_create(shader);
565    start_block->cf_node.parent = &impl->cf_node;
566    end_block->cf_node.parent = &impl->cf_node;
567    impl->end_block = end_block;
568 
569    exec_list_push_tail(&impl->body, &start_block->cf_node.node);
570 
571    start_block->successors[0] = end_block;
572    _mesa_set_add(end_block->predecessors, start_block);
573    return impl;
574 }
575 
576 nir_function_impl *
nir_function_impl_create(nir_function * function)577 nir_function_impl_create(nir_function *function)
578 {
579    assert(function->impl == NULL);
580 
581    nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
582    nir_function_set_impl(function, impl);
583    return impl;
584 }
585 
586 nir_block *
nir_block_create(nir_shader * shader)587 nir_block_create(nir_shader *shader)
588 {
589    nir_block *block = rzalloc(shader, nir_block);
590 
591    cf_init(&block->cf_node, nir_cf_node_block);
592 
593    block->successors[0] = block->successors[1] = NULL;
594    block->predecessors = _mesa_pointer_set_create(block);
595    block->imm_dom = NULL;
596    /* XXX maybe it would be worth it to defer allocation?  This
597     * way it doesn't get allocated for shader refs that never run
598     * nir_calc_dominance?  For example, state-tracker creates an
599     * initial IR, clones that, runs appropriate lowering pass, passes
600     * to driver which does common lowering/opt, and then stores ref
601     * which is later used to do state specific lowering and futher
602     * opt.  Do any of the references not need dominance metadata?
603     */
604    block->dom_frontier = _mesa_pointer_set_create(block);
605 
606    exec_list_make_empty(&block->instr_list);
607 
608    return block;
609 }
610 
611 static inline void
src_init(nir_src * src)612 src_init(nir_src *src)
613 {
614    src->ssa = NULL;
615 }
616 
617 nir_if *
nir_if_create(nir_shader * shader)618 nir_if_create(nir_shader *shader)
619 {
620    nir_if *if_stmt = ralloc(shader, nir_if);
621 
622    if_stmt->control = nir_selection_control_none;
623 
624    cf_init(&if_stmt->cf_node, nir_cf_node_if);
625    src_init(&if_stmt->condition);
626 
627    nir_block *then = nir_block_create(shader);
628    exec_list_make_empty(&if_stmt->then_list);
629    exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
630    then->cf_node.parent = &if_stmt->cf_node;
631 
632    nir_block *else_stmt = nir_block_create(shader);
633    exec_list_make_empty(&if_stmt->else_list);
634    exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
635    else_stmt->cf_node.parent = &if_stmt->cf_node;
636 
637    return if_stmt;
638 }
639 
640 nir_loop *
nir_loop_create(nir_shader * shader)641 nir_loop_create(nir_shader *shader)
642 {
643    nir_loop *loop = rzalloc(shader, nir_loop);
644 
645    cf_init(&loop->cf_node, nir_cf_node_loop);
646    /* Assume that loops are divergent until proven otherwise */
647    loop->divergent = true;
648 
649    nir_block *body = nir_block_create(shader);
650    exec_list_make_empty(&loop->body);
651    exec_list_push_tail(&loop->body, &body->cf_node.node);
652    body->cf_node.parent = &loop->cf_node;
653 
654    body->successors[0] = body;
655    _mesa_set_add(body->predecessors, body);
656 
657    exec_list_make_empty(&loop->continue_list);
658 
659    return loop;
660 }
661 
662 static void
instr_init(nir_instr * instr,nir_instr_type type)663 instr_init(nir_instr *instr, nir_instr_type type)
664 {
665    instr->type = type;
666    instr->block = NULL;
667    exec_node_init(&instr->node);
668 }
669 
670 static void
alu_src_init(nir_alu_src * src)671 alu_src_init(nir_alu_src *src)
672 {
673    src_init(&src->src);
674    for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
675       src->swizzle[i] = i;
676 }
677 
678 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)679 nir_alu_instr_create(nir_shader *shader, nir_op op)
680 {
681    unsigned num_srcs = nir_op_infos[op].num_inputs;
682    nir_alu_instr *instr = gc_zalloc_zla(shader->gctx, nir_alu_instr, nir_alu_src, num_srcs);
683 
684    instr_init(&instr->instr, nir_instr_type_alu);
685    instr->op = op;
686    for (unsigned i = 0; i < num_srcs; i++)
687       alu_src_init(&instr->src[i]);
688 
689    return instr;
690 }
691 
692 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)693 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
694 {
695    nir_deref_instr *instr = gc_zalloc(shader->gctx, nir_deref_instr, 1);
696 
697    instr_init(&instr->instr, nir_instr_type_deref);
698 
699    instr->deref_type = deref_type;
700    if (deref_type != nir_deref_type_var)
701       src_init(&instr->parent);
702 
703    if (deref_type == nir_deref_type_array ||
704        deref_type == nir_deref_type_ptr_as_array)
705       src_init(&instr->arr.index);
706 
707    return instr;
708 }
709 
710 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)711 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
712 {
713    nir_jump_instr *instr = gc_alloc(shader->gctx, nir_jump_instr, 1);
714    instr_init(&instr->instr, nir_instr_type_jump);
715    src_init(&instr->condition);
716    instr->type = type;
717    instr->target = NULL;
718    instr->else_target = NULL;
719 
720    return instr;
721 }
722 
723 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)724 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
725                             unsigned bit_size)
726 {
727    nir_load_const_instr *instr =
728       gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
729    instr_init(&instr->instr, nir_instr_type_load_const);
730 
731    nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
732 
733    return instr;
734 }
735 
736 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)737 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
738 {
739    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
740    nir_intrinsic_instr *instr =
741       gc_zalloc_zla(shader->gctx, nir_intrinsic_instr, nir_src, num_srcs);
742 
743    instr_init(&instr->instr, nir_instr_type_intrinsic);
744    instr->intrinsic = op;
745 
746    for (unsigned i = 0; i < num_srcs; i++)
747       src_init(&instr->src[i]);
748 
749    return instr;
750 }
751 
752 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)753 nir_call_instr_create(nir_shader *shader, nir_function *callee)
754 {
755    const unsigned num_params = callee->num_params;
756    nir_call_instr *instr =
757       gc_zalloc_zla(shader->gctx, nir_call_instr, nir_src, num_params);
758 
759    instr_init(&instr->instr, nir_instr_type_call);
760    instr->callee = callee;
761    instr->num_params = num_params;
762    for (unsigned i = 0; i < num_params; i++)
763       src_init(&instr->params[i]);
764 
765    return instr;
766 }
767 
768 static int8_t default_tg4_offsets[4][2] = {
769    { 0, 1 },
770    { 1, 1 },
771    { 1, 0 },
772    { 0, 0 },
773 };
774 
775 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)776 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
777 {
778    nir_tex_instr *instr = gc_zalloc(shader->gctx, nir_tex_instr, 1);
779    instr_init(&instr->instr, nir_instr_type_tex);
780 
781    instr->num_srcs = num_srcs;
782    instr->src = gc_alloc(shader->gctx, nir_tex_src, num_srcs);
783    for (unsigned i = 0; i < num_srcs; i++)
784       src_init(&instr->src[i].src);
785 
786    instr->texture_index = 0;
787    instr->sampler_index = 0;
788    memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
789 
790    return instr;
791 }
792 
793 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_def * src)794 nir_tex_instr_add_src(nir_tex_instr *tex,
795                       nir_tex_src_type src_type,
796                       nir_def *src)
797 {
798    nir_tex_src *new_srcs = gc_zalloc(gc_get_context(tex), nir_tex_src, tex->num_srcs + 1);
799 
800    for (unsigned i = 0; i < tex->num_srcs; i++) {
801       new_srcs[i].src_type = tex->src[i].src_type;
802       nir_instr_move_src(&tex->instr, &new_srcs[i].src,
803                          &tex->src[i].src);
804    }
805 
806    gc_free(tex->src);
807    tex->src = new_srcs;
808 
809    tex->src[tex->num_srcs].src_type = src_type;
810    nir_instr_init_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
811    tex->num_srcs++;
812 }
813 
814 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)815 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
816 {
817    assert(src_idx < tex->num_srcs);
818 
819    /* First rewrite the source to NIR_SRC_INIT */
820    nir_instr_clear_src(&tex->instr, &tex->src[src_idx].src);
821 
822    /* Now, move all of the other sources down */
823    for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
824       tex->src[i - 1].src_type = tex->src[i].src_type;
825       nir_instr_move_src(&tex->instr, &tex->src[i - 1].src, &tex->src[i].src);
826    }
827    tex->num_srcs--;
828 }
829 
830 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)831 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
832 {
833    if (tex->op != nir_texop_tg4)
834       return false;
835    return memcmp(tex->tg4_offsets, default_tg4_offsets,
836                  sizeof(tex->tg4_offsets)) != 0;
837 }
838 
839 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)840 nir_phi_instr_create(nir_shader *shader)
841 {
842    nir_phi_instr *instr = gc_alloc(shader->gctx, nir_phi_instr, 1);
843    instr_init(&instr->instr, nir_instr_type_phi);
844 
845    exec_list_make_empty(&instr->srcs);
846 
847    return instr;
848 }
849 
850 /**
851  * Adds a new source to a NIR instruction.
852  *
853  * Note that this does not update the def/use relationship for src, assuming
854  * that the instr is not in the shader.  If it is, you have to do:
855  *
856  * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
857  */
858 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_def * src)859 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_def *src)
860 {
861    nir_phi_src *phi_src;
862 
863    phi_src = gc_zalloc(gc_get_context(instr), nir_phi_src, 1);
864    phi_src->pred = pred;
865    phi_src->src = nir_src_for_ssa(src);
866    nir_src_set_parent_instr(&phi_src->src, &instr->instr);
867    exec_list_push_tail(&instr->srcs, &phi_src->node);
868 
869    return phi_src;
870 }
871 
872 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)873 nir_parallel_copy_instr_create(nir_shader *shader)
874 {
875    nir_parallel_copy_instr *instr = gc_alloc(shader->gctx, nir_parallel_copy_instr, 1);
876    instr_init(&instr->instr, nir_instr_type_parallel_copy);
877 
878    exec_list_make_empty(&instr->entries);
879 
880    return instr;
881 }
882 
883 nir_debug_info_instr *
nir_debug_info_instr_create(nir_shader * shader,nir_debug_info_type type,uint32_t string_length)884 nir_debug_info_instr_create(nir_shader *shader, nir_debug_info_type type,
885                             uint32_t string_length)
886 {
887    uint32_t additional_size = 0;
888    if (type == nir_debug_info_string)
889       additional_size = string_length + 1;
890 
891    nir_debug_info_instr *instr = gc_zalloc_size(
892       shader->gctx, sizeof(nir_debug_info_instr) + additional_size, 1);
893    instr_init(&instr->instr, nir_instr_type_debug_info);
894 
895    instr->type = type;
896 
897    if (type == nir_debug_info_string)
898       instr->string_length = string_length;
899 
900    return instr;
901 }
902 
903 nir_undef_instr *
nir_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)904 nir_undef_instr_create(nir_shader *shader,
905                        unsigned num_components,
906                        unsigned bit_size)
907 {
908    nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
909    instr_init(&instr->instr, nir_instr_type_undef);
910 
911    nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
912 
913    return instr;
914 }
915 
916 static nir_const_value
const_value_float(double d,unsigned bit_size)917 const_value_float(double d, unsigned bit_size)
918 {
919    nir_const_value v;
920    memset(&v, 0, sizeof(v));
921 
922    /* clang-format off */
923    switch (bit_size) {
924    case 16: v.u16 = _mesa_float_to_half(d);  break;
925    case 32: v.f32 = d;                       break;
926    case 64: v.f64 = d;                       break;
927    default:
928       unreachable("Invalid bit size");
929    }
930    /* clang-format on */
931 
932    return v;
933 }
934 
935 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)936 const_value_int(int64_t i, unsigned bit_size)
937 {
938    nir_const_value v;
939    memset(&v, 0, sizeof(v));
940 
941    /* clang-format off */
942    switch (bit_size) {
943    case 1:  v.b   = i & 1; break;
944    case 8:  v.i8  = i;     break;
945    case 16: v.i16 = i;     break;
946    case 32: v.i32 = i;     break;
947    case 64: v.i64 = i;     break;
948    default:
949       unreachable("Invalid bit size");
950    }
951    /* clang-format on */
952 
953    return v;
954 }
955 
956 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)957 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
958 {
959    const int64_t max_int = (1ull << (bit_size - 1)) - 1;
960    const int64_t min_int = -max_int - 1;
961    switch (binop) {
962    case nir_op_iadd:
963       return const_value_int(0, bit_size);
964    case nir_op_fadd:
965       return const_value_float(0, bit_size);
966    case nir_op_imul:
967       return const_value_int(1, bit_size);
968    case nir_op_fmul:
969       return const_value_float(1, bit_size);
970    case nir_op_imin:
971       return const_value_int(max_int, bit_size);
972    case nir_op_umin:
973       return const_value_int(~0ull, bit_size);
974    case nir_op_fmin:
975       return const_value_float(INFINITY, bit_size);
976    case nir_op_imax:
977       return const_value_int(min_int, bit_size);
978    case nir_op_umax:
979       return const_value_int(0, bit_size);
980    case nir_op_fmax:
981       return const_value_float(-INFINITY, bit_size);
982    case nir_op_iand:
983       return const_value_int(~0ull, bit_size);
984    case nir_op_ior:
985       return const_value_int(0, bit_size);
986    case nir_op_ixor:
987       return const_value_int(0, bit_size);
988    default:
989       unreachable("Invalid reduction operation");
990    }
991 }
992 
993 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)994 nir_cf_node_get_function(nir_cf_node *node)
995 {
996    while (node->type != nir_cf_node_function) {
997       node = node->parent;
998    }
999 
1000    return nir_cf_node_as_function(node);
1001 }
1002 
1003 /* Reduces a cursor by trying to convert everything to after and trying to
1004  * go up to block granularity when possible.
1005  */
1006 static nir_cursor
reduce_cursor(nir_cursor cursor)1007 reduce_cursor(nir_cursor cursor)
1008 {
1009    switch (cursor.option) {
1010    case nir_cursor_before_block:
1011       if (exec_list_is_empty(&cursor.block->instr_list)) {
1012          /* Empty block.  After is as good as before. */
1013          cursor.option = nir_cursor_after_block;
1014       }
1015       return cursor;
1016 
1017    case nir_cursor_after_block:
1018       return cursor;
1019 
1020    case nir_cursor_before_instr: {
1021       nir_instr *prev_instr = nir_instr_prev(cursor.instr);
1022       if (prev_instr) {
1023          /* Before this instruction is after the previous */
1024          cursor.instr = prev_instr;
1025          cursor.option = nir_cursor_after_instr;
1026       } else {
1027          /* No previous instruction.  Switch to before block */
1028          cursor.block = cursor.instr->block;
1029          cursor.option = nir_cursor_before_block;
1030       }
1031       return reduce_cursor(cursor);
1032    }
1033 
1034    case nir_cursor_after_instr:
1035       if (nir_instr_next(cursor.instr) == NULL) {
1036          /* This is the last instruction, switch to after block */
1037          cursor.option = nir_cursor_after_block;
1038          cursor.block = cursor.instr->block;
1039       }
1040       return cursor;
1041 
1042    default:
1043       unreachable("Inavlid cursor option");
1044    }
1045 }
1046 
1047 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)1048 nir_cursors_equal(nir_cursor a, nir_cursor b)
1049 {
1050    /* Reduced cursors should be unique */
1051    a = reduce_cursor(a);
1052    b = reduce_cursor(b);
1053 
1054    return a.block == b.block && a.option == b.option;
1055 }
1056 
1057 static bool
add_use_cb(nir_src * src,void * state)1058 add_use_cb(nir_src *src, void *state)
1059 {
1060    nir_instr *instr = state;
1061 
1062    nir_src_set_parent_instr(src, instr);
1063    list_addtail(&src->use_link, &src->ssa->uses);
1064 
1065    return true;
1066 }
1067 
1068 static bool
add_ssa_def_cb(nir_def * def,void * state)1069 add_ssa_def_cb(nir_def *def, void *state)
1070 {
1071    nir_instr *instr = state;
1072 
1073    if (instr->block && def->index == UINT_MAX) {
1074       nir_function_impl *impl =
1075          nir_cf_node_get_function(&instr->block->cf_node);
1076 
1077       def->index = impl->ssa_alloc++;
1078 
1079       impl->valid_metadata &= ~nir_metadata_live_defs;
1080    }
1081 
1082    return true;
1083 }
1084 
1085 static void
add_defs_uses(nir_instr * instr)1086 add_defs_uses(nir_instr *instr)
1087 {
1088    nir_foreach_src(instr, add_use_cb, instr);
1089    nir_foreach_def(instr, add_ssa_def_cb, instr);
1090 }
1091 
1092 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1093 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1094 {
1095    switch (cursor.option) {
1096    case nir_cursor_before_block:
1097       /* Only allow inserting jumps into empty blocks. */
1098       if (instr->type == nir_instr_type_jump)
1099          assert(exec_list_is_empty(&cursor.block->instr_list));
1100 
1101       instr->block = cursor.block;
1102       add_defs_uses(instr);
1103       exec_list_push_head(&cursor.block->instr_list, &instr->node);
1104       break;
1105    case nir_cursor_after_block: {
1106       /* Inserting instructions after a jump is illegal. */
1107       nir_instr *last = nir_block_last_instr(cursor.block);
1108       assert(last == NULL || last->type != nir_instr_type_jump);
1109       (void)last;
1110 
1111       instr->block = cursor.block;
1112       add_defs_uses(instr);
1113       exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1114       break;
1115    }
1116    case nir_cursor_before_instr:
1117       assert(instr->type != nir_instr_type_jump);
1118       instr->block = cursor.instr->block;
1119       add_defs_uses(instr);
1120       exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1121       break;
1122    case nir_cursor_after_instr:
1123       /* Inserting instructions after a jump is illegal. */
1124       assert(cursor.instr->type != nir_instr_type_jump);
1125 
1126       /* Only allow inserting jumps at the end of the block. */
1127       if (instr->type == nir_instr_type_jump)
1128          assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1129 
1130       instr->block = cursor.instr->block;
1131       add_defs_uses(instr);
1132       exec_node_insert_after(&cursor.instr->node, &instr->node);
1133       break;
1134    }
1135 
1136    if (instr->type == nir_instr_type_jump)
1137       nir_handle_add_jump(instr->block);
1138 
1139    nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1140    impl->valid_metadata &= ~nir_metadata_instr_index;
1141 }
1142 
1143 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1144 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1145 {
1146    /* If the cursor happens to refer to this instruction (either before or
1147     * after), don't do anything.
1148     */
1149    if ((cursor.option == nir_cursor_before_instr ||
1150         cursor.option == nir_cursor_after_instr) &&
1151        cursor.instr == instr)
1152       return false;
1153 
1154    nir_instr_remove(instr);
1155    nir_instr_insert(cursor, instr);
1156    return true;
1157 }
1158 
1159 static bool
src_is_valid(const nir_src * src)1160 src_is_valid(const nir_src *src)
1161 {
1162    return (src->ssa != NULL);
1163 }
1164 
1165 static bool
remove_use_cb(nir_src * src,void * state)1166 remove_use_cb(nir_src *src, void *state)
1167 {
1168    (void)state;
1169 
1170    if (src_is_valid(src))
1171       list_del(&src->use_link);
1172 
1173    return true;
1174 }
1175 
1176 static void
remove_defs_uses(nir_instr * instr)1177 remove_defs_uses(nir_instr *instr)
1178 {
1179    nir_foreach_src(instr, remove_use_cb, instr);
1180 }
1181 
1182 void
nir_instr_remove_v(nir_instr * instr)1183 nir_instr_remove_v(nir_instr *instr)
1184 {
1185    remove_defs_uses(instr);
1186    exec_node_remove(&instr->node);
1187 
1188    if (instr->type == nir_instr_type_jump) {
1189       nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1190       nir_handle_remove_jump(instr->block, jump_instr->type);
1191    }
1192 }
1193 
1194 void
nir_instr_free(nir_instr * instr)1195 nir_instr_free(nir_instr *instr)
1196 {
1197    switch (instr->type) {
1198    case nir_instr_type_tex:
1199       gc_free(nir_instr_as_tex(instr)->src);
1200       break;
1201 
1202    case nir_instr_type_phi: {
1203       nir_phi_instr *phi = nir_instr_as_phi(instr);
1204       nir_foreach_phi_src_safe(phi_src, phi)
1205          gc_free(phi_src);
1206       break;
1207    }
1208 
1209    default:
1210       break;
1211    }
1212 
1213    gc_free(instr);
1214 }
1215 
1216 void
nir_instr_free_list(struct exec_list * list)1217 nir_instr_free_list(struct exec_list *list)
1218 {
1219    struct exec_node *node;
1220    while ((node = exec_list_pop_head(list))) {
1221       nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1222       nir_instr_free(removed_instr);
1223    }
1224 }
1225 
1226 static bool
nir_instr_free_and_dce_live_cb(nir_def * def,void * state)1227 nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
1228 {
1229    bool *live = state;
1230 
1231    if (!nir_def_is_unused(def)) {
1232       *live = true;
1233       return false;
1234    } else {
1235       return true;
1236    }
1237 }
1238 
1239 static bool
nir_instr_free_and_dce_is_live(nir_instr * instr)1240 nir_instr_free_and_dce_is_live(nir_instr *instr)
1241 {
1242    /* Note: don't have to worry about jumps because they don't have dests to
1243     * become unused.
1244     */
1245    if (instr->type == nir_instr_type_intrinsic) {
1246       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1247       const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1248       if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1249          return true;
1250    }
1251 
1252    bool live = false;
1253    nir_foreach_def(instr, nir_instr_free_and_dce_live_cb, &live);
1254    return live;
1255 }
1256 
1257 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1258 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1259 {
1260    nir_instr_worklist *wl = state;
1261 
1262    list_del(&src->use_link);
1263    if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1264       nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1265 
1266    /* Stop nir_instr_remove from trying to delete the link again. */
1267    src->ssa = NULL;
1268 
1269    return true;
1270 }
1271 
1272 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1273 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1274 {
1275    nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1276 }
1277 
1278 /**
1279  * Frees an instruction and any SSA defs that it used that are now dead,
1280  * returning a nir_cursor where the instruction previously was.
1281  */
1282 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1283 nir_instr_free_and_dce(nir_instr *instr)
1284 {
1285    nir_instr_worklist *worklist = nir_instr_worklist_create();
1286 
1287    nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1288    nir_cursor c = nir_instr_remove(instr);
1289 
1290    struct exec_list to_free;
1291    exec_list_make_empty(&to_free);
1292 
1293    nir_instr *dce_instr;
1294    while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1295       nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1296 
1297       /* If we're removing the instr where our cursor is, then we have to
1298        * point the cursor elsewhere.
1299        */
1300       if ((c.option == nir_cursor_before_instr ||
1301            c.option == nir_cursor_after_instr) &&
1302           c.instr == dce_instr)
1303          c = nir_instr_remove(dce_instr);
1304       else
1305          nir_instr_remove(dce_instr);
1306       exec_list_push_tail(&to_free, &dce_instr->node);
1307    }
1308 
1309    nir_instr_free_list(&to_free);
1310 
1311    nir_instr_worklist_destroy(worklist);
1312 
1313    return c;
1314 }
1315 
1316 /*@}*/
1317 
1318 nir_def *
nir_instr_def(nir_instr * instr)1319 nir_instr_def(nir_instr *instr)
1320 {
1321    switch (instr->type) {
1322    case nir_instr_type_alu:
1323       return &nir_instr_as_alu(instr)->def;
1324 
1325    case nir_instr_type_deref:
1326       return &nir_instr_as_deref(instr)->def;
1327 
1328    case nir_instr_type_tex:
1329       return &nir_instr_as_tex(instr)->def;
1330 
1331    case nir_instr_type_intrinsic: {
1332       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1333       if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1334          return &intrin->def;
1335       } else {
1336          return NULL;
1337       }
1338    }
1339 
1340    case nir_instr_type_phi:
1341       return &nir_instr_as_phi(instr)->def;
1342 
1343    case nir_instr_type_parallel_copy:
1344       unreachable("Parallel copies are unsupported by this function");
1345 
1346    case nir_instr_type_load_const:
1347       return &nir_instr_as_load_const(instr)->def;
1348 
1349    case nir_instr_type_undef:
1350       return &nir_instr_as_undef(instr)->def;
1351 
1352    case nir_instr_type_call:
1353    case nir_instr_type_jump:
1354    case nir_instr_type_debug_info:
1355       return NULL;
1356    }
1357 
1358    unreachable("Invalid instruction type");
1359 }
1360 
1361 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1362 nir_foreach_phi_src_leaving_block(nir_block *block,
1363                                   nir_foreach_src_cb cb,
1364                                   void *state)
1365 {
1366    for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1367       if (block->successors[i] == NULL)
1368          continue;
1369 
1370       nir_foreach_phi(phi, block->successors[i]) {
1371          nir_foreach_phi_src(phi_src, phi) {
1372             if (phi_src->pred == block) {
1373                if (!cb(&phi_src->src, state))
1374                   return false;
1375             }
1376          }
1377       }
1378    }
1379 
1380    return true;
1381 }
1382 
1383 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1384 nir_const_value_for_float(double f, unsigned bit_size)
1385 {
1386    nir_const_value v;
1387    memset(&v, 0, sizeof(v));
1388 
1389    /* clang-format off */
1390    switch (bit_size) {
1391    case 16: v.u16 = _mesa_float_to_half(f);  break;
1392    case 32: v.f32 = f;                       break;
1393    case 64: v.f64 = f;                       break;
1394    default: unreachable("Invalid bit size");
1395    }
1396    /* clang-format on */
1397 
1398    return v;
1399 }
1400 
1401 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1402 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1403 {
1404    /* clang-format off */
1405    switch (bit_size) {
1406    case 16: return _mesa_half_to_float(value.u16);
1407    case 32: return value.f32;
1408    case 64: return value.f64;
1409    default: unreachable("Invalid bit size");
1410    }
1411    /* clang-format on */
1412 }
1413 
1414 nir_const_value *
nir_src_as_const_value(nir_src src)1415 nir_src_as_const_value(nir_src src)
1416 {
1417    if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1418       return NULL;
1419 
1420    nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1421 
1422    return load->value;
1423 }
1424 
1425 const char *
nir_src_as_string(nir_src src)1426 nir_src_as_string(nir_src src)
1427 {
1428    nir_debug_info_instr *di = nir_src_as_debug_info(src);
1429    if (di && di->type == nir_debug_info_string)
1430       return di->string;
1431 
1432    return NULL;
1433 }
1434 
1435 /**
1436  * Returns true if the source is known to be always uniform. Otherwise it
1437  * returns false which means it may or may not be uniform but it can't be
1438  * determined.
1439  *
1440  * For a more precise analysis of uniform values, use nir_divergence_analysis.
1441  */
1442 bool
nir_src_is_always_uniform(nir_src src)1443 nir_src_is_always_uniform(nir_src src)
1444 {
1445    /* Constants are trivially uniform */
1446    if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1447       return true;
1448 
1449    if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1450       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1451       /* As are uniform variables */
1452       if (intr->intrinsic == nir_intrinsic_load_uniform &&
1453           nir_src_is_always_uniform(intr->src[0]))
1454          return true;
1455       /* From the Vulkan specification 15.6.1. Push Constant Interface:
1456        * "Any member of a push constant block that is declared as an array must
1457        * only be accessed with dynamically uniform indices."
1458        */
1459       if (intr->intrinsic == nir_intrinsic_load_push_constant)
1460          return true;
1461       if (intr->intrinsic == nir_intrinsic_load_deref &&
1462           nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1463          return true;
1464    }
1465 
1466    /* Operating together uniform expressions produces a uniform result */
1467    if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1468       nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1469       for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1470          if (!nir_src_is_always_uniform(alu->src[i].src))
1471             return false;
1472       }
1473 
1474       return true;
1475    }
1476 
1477    /* XXX: this could have many more tests, such as when a sampler function is
1478     * called with uniform arguments.
1479     */
1480    return false;
1481 }
1482 
1483 nir_block *
nir_src_get_block(nir_src * src)1484 nir_src_get_block(nir_src *src)
1485 {
1486    if (nir_src_is_if(src))
1487       return nir_cf_node_cf_tree_prev(&nir_src_parent_if(src)->cf_node);
1488    else if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
1489       return list_entry(src, nir_phi_src, src)->pred;
1490    else
1491       return nir_src_parent_instr(src)->block;
1492 }
1493 
1494 static void
src_remove_all_uses(nir_src * src)1495 src_remove_all_uses(nir_src *src)
1496 {
1497    if (src && src_is_valid(src))
1498       list_del(&src->use_link);
1499 }
1500 
1501 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1502 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1503 {
1504    if (!src)
1505       return;
1506 
1507    if (!src_is_valid(src))
1508       return;
1509 
1510    if (parent_instr) {
1511       nir_src_set_parent_instr(src, parent_instr);
1512    } else {
1513       assert(parent_if);
1514       nir_src_set_parent_if(src, parent_if);
1515    }
1516 
1517    list_addtail(&src->use_link, &src->ssa->uses);
1518 }
1519 
1520 void
nir_instr_init_src(nir_instr * instr,nir_src * src,nir_def * def)1521 nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def)
1522 {
1523    *src = nir_src_for_ssa(def);
1524    src_add_all_uses(src, instr, NULL);
1525 }
1526 
1527 void
nir_instr_clear_src(nir_instr * instr,nir_src * src)1528 nir_instr_clear_src(nir_instr *instr, nir_src *src)
1529 {
1530    src_remove_all_uses(src);
1531    *src = NIR_SRC_INIT;
1532 }
1533 
1534 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1535 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1536 {
1537    assert(!src_is_valid(dest) || nir_src_parent_instr(dest) == dest_instr);
1538 
1539    src_remove_all_uses(dest);
1540    src_remove_all_uses(src);
1541    *dest = *src;
1542    *src = NIR_SRC_INIT;
1543    src_add_all_uses(dest, dest_instr, NULL);
1544 }
1545 
1546 void
nir_def_init(nir_instr * instr,nir_def * def,unsigned num_components,unsigned bit_size)1547 nir_def_init(nir_instr *instr, nir_def *def,
1548              unsigned num_components,
1549              unsigned bit_size)
1550 {
1551    def->parent_instr = instr;
1552    list_inithead(&def->uses);
1553    def->num_components = num_components;
1554    def->bit_size = bit_size;
1555    def->divergent = true; /* This is the safer default */
1556 
1557    if (instr->block) {
1558       nir_function_impl *impl =
1559          nir_cf_node_get_function(&instr->block->cf_node);
1560 
1561       def->index = impl->ssa_alloc++;
1562 
1563       impl->valid_metadata &= ~nir_metadata_live_defs;
1564    } else {
1565       def->index = UINT_MAX;
1566    }
1567 }
1568 
1569 void
nir_def_rewrite_uses(nir_def * def,nir_def * new_ssa)1570 nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
1571 {
1572    assert(def != new_ssa);
1573    nir_foreach_use_including_if_safe(use_src, def) {
1574       nir_src_rewrite(use_src, new_ssa);
1575    }
1576 }
1577 
1578 void
nir_def_rewrite_uses_src(nir_def * def,nir_src new_src)1579 nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
1580 {
1581    nir_def_rewrite_uses(def, new_src.ssa);
1582 }
1583 
1584 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1585 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1586 {
1587    assert(start->block == end->block);
1588 
1589    if (between->block != start->block)
1590       return false;
1591 
1592    /* Search backwards looking for "between" */
1593    while (start != end) {
1594       if (between == end)
1595          return true;
1596 
1597       end = nir_instr_prev(end);
1598       assert(end);
1599    }
1600 
1601    return false;
1602 }
1603 
1604 /* Replaces all uses of the given SSA def with the given source but only if
1605  * the use comes after the after_me instruction.  This can be useful if you
1606  * are emitting code to fix up the result of some instruction: you can freely
1607  * use the result in that code and then call rewrite_uses_after and pass the
1608  * last fixup instruction as after_me and it will replace all of the uses you
1609  * want without touching the fixup code.
1610  *
1611  * This function assumes that after_me is in the same block as
1612  * def->parent_instr and that after_me comes after def->parent_instr.
1613  */
1614 void
nir_def_rewrite_uses_after(nir_def * def,nir_def * new_ssa,nir_instr * after_me)1615 nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
1616                            nir_instr *after_me)
1617 {
1618    if (def == new_ssa)
1619       return;
1620 
1621    nir_foreach_use_including_if_safe(use_src, def) {
1622       if (!nir_src_is_if(use_src)) {
1623          assert(nir_src_parent_instr(use_src) != def->parent_instr);
1624 
1625          /* Since def already dominates all of its uses, the only way a use can
1626           * not be dominated by after_me is if it is between def and after_me in
1627           * the instruction list.
1628           */
1629          if (is_instr_between(def->parent_instr, after_me, nir_src_parent_instr(use_src)))
1630             continue;
1631       }
1632 
1633       nir_src_rewrite(use_src, new_ssa);
1634    }
1635 }
1636 
1637 static nir_def *
get_store_value(nir_intrinsic_instr * intrin)1638 get_store_value(nir_intrinsic_instr *intrin)
1639 {
1640    assert(nir_intrinsic_has_write_mask(intrin));
1641    /* deref stores have the deref in src[0] and the store value in src[1] */
1642    if (intrin->intrinsic == nir_intrinsic_store_deref ||
1643        intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1644       return intrin->src[1].ssa;
1645 
1646    /* all other stores have the store value in src[0] */
1647    return intrin->src[0].ssa;
1648 }
1649 
1650 nir_component_mask_t
nir_src_components_read(const nir_src * src)1651 nir_src_components_read(const nir_src *src)
1652 {
1653    assert(nir_src_parent_instr(src));
1654 
1655    if (nir_src_parent_instr(src)->type == nir_instr_type_alu) {
1656       nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
1657       nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1658       int src_idx = alu_src - &alu->src[0];
1659       assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1660       return nir_alu_instr_src_read_mask(alu, src_idx);
1661    } else if (nir_src_parent_instr(src)->type == nir_instr_type_intrinsic) {
1662       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
1663       if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1664          return nir_intrinsic_write_mask(intrin);
1665       else
1666          return (1 << src->ssa->num_components) - 1;
1667    } else {
1668       return (1 << src->ssa->num_components) - 1;
1669    }
1670 }
1671 
1672 nir_component_mask_t
nir_def_components_read(const nir_def * def)1673 nir_def_components_read(const nir_def *def)
1674 {
1675    nir_component_mask_t read_mask = 0;
1676 
1677    nir_foreach_use_including_if(use, def) {
1678       read_mask |= nir_src_is_if(use) ? 1 : nir_src_components_read(use);
1679 
1680       if (read_mask == (1 << def->num_components) - 1)
1681          return read_mask;
1682    }
1683 
1684    return read_mask;
1685 }
1686 
1687 bool
nir_def_all_uses_are_fsat(const nir_def * def)1688 nir_def_all_uses_are_fsat(const nir_def *def)
1689 {
1690    nir_foreach_use(src, def) {
1691       if (nir_src_is_if(src))
1692          return false;
1693 
1694       nir_instr *use = nir_src_parent_instr(src);
1695       if (use->type != nir_instr_type_alu)
1696          return false;
1697 
1698       nir_alu_instr *alu = nir_instr_as_alu(use);
1699       if (alu->op != nir_op_fsat)
1700          return false;
1701    }
1702 
1703    return true;
1704 }
1705 
1706 nir_block *
nir_block_unstructured_next(nir_block * block)1707 nir_block_unstructured_next(nir_block *block)
1708 {
1709    if (block == NULL) {
1710       /* nir_foreach_block_unstructured_safe() will call this function on a
1711        * NULL block after the last iteration, but it won't use the result so
1712        * just return NULL here.
1713        */
1714       return NULL;
1715    }
1716 
1717    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1718    if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1719       return NULL;
1720 
1721    if (cf_next && cf_next->type == nir_cf_node_block)
1722       return nir_cf_node_as_block(cf_next);
1723 
1724    return nir_block_cf_tree_next(block);
1725 }
1726 
1727 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1728 nir_unstructured_start_block(nir_function_impl *impl)
1729 {
1730    return nir_start_block(impl);
1731 }
1732 
1733 nir_block *
nir_block_cf_tree_next(nir_block * block)1734 nir_block_cf_tree_next(nir_block *block)
1735 {
1736    if (block == NULL) {
1737       /* nir_foreach_block_safe() will call this function on a NULL block
1738        * after the last iteration, but it won't use the result so just return
1739        * NULL here.
1740        */
1741       return NULL;
1742    }
1743 
1744    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1745 
1746    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1747    if (cf_next)
1748       return nir_cf_node_cf_tree_first(cf_next);
1749 
1750    nir_cf_node *parent = block->cf_node.parent;
1751    if (parent->type == nir_cf_node_function)
1752       return NULL;
1753 
1754    /* Is this the last block of a cf_node? Return the following block */
1755    if (block == nir_cf_node_cf_tree_last(parent))
1756       return nir_cf_node_as_block(nir_cf_node_next(parent));
1757 
1758    switch (parent->type) {
1759    case nir_cf_node_if: {
1760       /* We are at the end of the if. Go to the beginning of the else */
1761       nir_if *if_stmt = nir_cf_node_as_if(parent);
1762       assert(block == nir_if_last_then_block(if_stmt));
1763       return nir_if_first_else_block(if_stmt);
1764    }
1765 
1766    case nir_cf_node_loop: {
1767       /* We are at the end of the body and there is a continue construct */
1768       nir_loop *loop = nir_cf_node_as_loop(parent);
1769       assert(block == nir_loop_last_block(loop) &&
1770              nir_loop_has_continue_construct(loop));
1771       return nir_loop_first_continue_block(loop);
1772    }
1773 
1774    default:
1775       unreachable("unknown cf node type");
1776    }
1777 }
1778 
1779 nir_block *
nir_block_cf_tree_prev(nir_block * block)1780 nir_block_cf_tree_prev(nir_block *block)
1781 {
1782    if (block == NULL) {
1783       /* do this for consistency with nir_block_cf_tree_next() */
1784       return NULL;
1785    }
1786 
1787    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1788 
1789    nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1790    if (cf_prev)
1791       return nir_cf_node_cf_tree_last(cf_prev);
1792 
1793    nir_cf_node *parent = block->cf_node.parent;
1794    if (parent->type == nir_cf_node_function)
1795       return NULL;
1796 
1797    /* Is this the first block of a cf_node? Return the previous block */
1798    if (block == nir_cf_node_cf_tree_first(parent))
1799       return nir_cf_node_as_block(nir_cf_node_prev(parent));
1800 
1801    switch (parent->type) {
1802    case nir_cf_node_if: {
1803       /* We are at the beginning of the else. Go to the end of the if */
1804       nir_if *if_stmt = nir_cf_node_as_if(parent);
1805       assert(block == nir_if_first_else_block(if_stmt));
1806       return nir_if_last_then_block(if_stmt);
1807    }
1808    case nir_cf_node_loop: {
1809       /* We are at the beginning of the continue construct. */
1810       nir_loop *loop = nir_cf_node_as_loop(parent);
1811       assert(nir_loop_has_continue_construct(loop) &&
1812              block == nir_loop_first_continue_block(loop));
1813       return nir_loop_last_block(loop);
1814    }
1815 
1816    default:
1817       unreachable("unknown cf node type");
1818    }
1819 }
1820 
1821 nir_block *
nir_cf_node_cf_tree_first(nir_cf_node * node)1822 nir_cf_node_cf_tree_first(nir_cf_node *node)
1823 {
1824    switch (node->type) {
1825    case nir_cf_node_function: {
1826       nir_function_impl *impl = nir_cf_node_as_function(node);
1827       return nir_start_block(impl);
1828    }
1829 
1830    case nir_cf_node_if: {
1831       nir_if *if_stmt = nir_cf_node_as_if(node);
1832       return nir_if_first_then_block(if_stmt);
1833    }
1834 
1835    case nir_cf_node_loop: {
1836       nir_loop *loop = nir_cf_node_as_loop(node);
1837       return nir_loop_first_block(loop);
1838    }
1839 
1840    case nir_cf_node_block: {
1841       return nir_cf_node_as_block(node);
1842    }
1843 
1844    default:
1845       unreachable("unknown node type");
1846    }
1847 }
1848 
1849 nir_block *
nir_cf_node_cf_tree_last(nir_cf_node * node)1850 nir_cf_node_cf_tree_last(nir_cf_node *node)
1851 {
1852    switch (node->type) {
1853    case nir_cf_node_function: {
1854       nir_function_impl *impl = nir_cf_node_as_function(node);
1855       return nir_impl_last_block(impl);
1856    }
1857 
1858    case nir_cf_node_if: {
1859       nir_if *if_stmt = nir_cf_node_as_if(node);
1860       return nir_if_last_else_block(if_stmt);
1861    }
1862 
1863    case nir_cf_node_loop: {
1864       nir_loop *loop = nir_cf_node_as_loop(node);
1865       if (nir_loop_has_continue_construct(loop))
1866          return nir_loop_last_continue_block(loop);
1867       else
1868          return nir_loop_last_block(loop);
1869    }
1870 
1871    case nir_cf_node_block: {
1872       return nir_cf_node_as_block(node);
1873    }
1874 
1875    default:
1876       unreachable("unknown node type");
1877    }
1878 }
1879 
1880 nir_block *
nir_cf_node_cf_tree_next(nir_cf_node * node)1881 nir_cf_node_cf_tree_next(nir_cf_node *node)
1882 {
1883    if (node->type == nir_cf_node_block)
1884       return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1885    else if (node->type == nir_cf_node_function)
1886       return NULL;
1887    else
1888       return nir_cf_node_as_block(nir_cf_node_next(node));
1889 }
1890 
1891 nir_block *
nir_cf_node_cf_tree_prev(nir_cf_node * node)1892 nir_cf_node_cf_tree_prev(nir_cf_node *node)
1893 {
1894    if (node->type == nir_cf_node_block)
1895       return nir_block_cf_tree_prev(nir_cf_node_as_block(node));
1896    else if (node->type == nir_cf_node_function)
1897       return NULL;
1898    else
1899       return nir_cf_node_as_block(nir_cf_node_prev(node));
1900 }
1901 
1902 nir_if *
nir_block_get_following_if(nir_block * block)1903 nir_block_get_following_if(nir_block *block)
1904 {
1905    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1906       return NULL;
1907 
1908    if (nir_cf_node_is_last(&block->cf_node))
1909       return NULL;
1910 
1911    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1912 
1913    if (next_node->type != nir_cf_node_if)
1914       return NULL;
1915 
1916    return nir_cf_node_as_if(next_node);
1917 }
1918 
1919 nir_loop *
nir_block_get_following_loop(nir_block * block)1920 nir_block_get_following_loop(nir_block *block)
1921 {
1922    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1923       return NULL;
1924 
1925    if (nir_cf_node_is_last(&block->cf_node))
1926       return NULL;
1927 
1928    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1929 
1930    if (next_node->type != nir_cf_node_loop)
1931       return NULL;
1932 
1933    return nir_cf_node_as_loop(next_node);
1934 }
1935 
1936 static int
compare_block_index(const void * p1,const void * p2)1937 compare_block_index(const void *p1, const void *p2)
1938 {
1939    const nir_block *block1 = *((const nir_block **)p1);
1940    const nir_block *block2 = *((const nir_block **)p2);
1941 
1942    return (int)block1->index - (int)block2->index;
1943 }
1944 
1945 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)1946 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
1947 {
1948    nir_block **preds =
1949       ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
1950 
1951    unsigned i = 0;
1952    set_foreach(block->predecessors, entry)
1953       preds[i++] = (nir_block *)entry->key;
1954    assert(i == block->predecessors->entries);
1955 
1956    qsort(preds, block->predecessors->entries, sizeof(nir_block *),
1957          compare_block_index);
1958 
1959    return preds;
1960 }
1961 
1962 void
nir_index_blocks(nir_function_impl * impl)1963 nir_index_blocks(nir_function_impl *impl)
1964 {
1965    unsigned index = 0;
1966 
1967    if (impl->valid_metadata & nir_metadata_block_index)
1968       return;
1969 
1970    nir_foreach_block_unstructured(block, impl) {
1971       block->index = index++;
1972    }
1973 
1974    /* The end_block isn't really part of the program, which is why its index
1975     * is >= num_blocks.
1976     */
1977    impl->num_blocks = impl->end_block->index = index;
1978 }
1979 
1980 static bool
index_ssa_def_cb(nir_def * def,void * state)1981 index_ssa_def_cb(nir_def *def, void *state)
1982 {
1983    unsigned *index = (unsigned *)state;
1984    def->index = (*index)++;
1985 
1986    return true;
1987 }
1988 
1989 /**
1990  * The indices are applied top-to-bottom which has the very nice property
1991  * that, if A dominates B, then A->index <= B->index.
1992  */
1993 void
nir_index_ssa_defs(nir_function_impl * impl)1994 nir_index_ssa_defs(nir_function_impl *impl)
1995 {
1996    unsigned index = 0;
1997 
1998    impl->valid_metadata &= ~nir_metadata_live_defs;
1999 
2000    nir_foreach_block_unstructured(block, impl) {
2001       nir_foreach_instr(instr, block)
2002          nir_foreach_def(instr, index_ssa_def_cb, &index);
2003    }
2004 
2005    impl->ssa_alloc = index;
2006 }
2007 
2008 /**
2009  * The indices are applied top-to-bottom which has the very nice property
2010  * that, if A dominates B, then A->index <= B->index.
2011  */
2012 unsigned
nir_index_instrs(nir_function_impl * impl)2013 nir_index_instrs(nir_function_impl *impl)
2014 {
2015    unsigned index = 0;
2016 
2017    nir_foreach_block(block, impl) {
2018       block->start_ip = index++;
2019 
2020       nir_foreach_instr(instr, block)
2021          instr->index = index++;
2022 
2023       block->end_ip = index++;
2024    }
2025 
2026    return index;
2027 }
2028 
2029 void
nir_shader_clear_pass_flags(nir_shader * shader)2030 nir_shader_clear_pass_flags(nir_shader *shader)
2031 {
2032    nir_foreach_function_impl(impl, shader) {
2033       nir_foreach_block(block, impl) {
2034          nir_foreach_instr(instr, block) {
2035             instr->pass_flags = 0;
2036          }
2037       }
2038    }
2039 }
2040 
2041 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)2042 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
2043 {
2044    unsigned count = 0;
2045    nir_foreach_variable_with_modes(var, shader, modes)
2046       var->index = count++;
2047    return count;
2048 }
2049 
2050 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2051 nir_function_impl_index_vars(nir_function_impl *impl)
2052 {
2053    unsigned count = 0;
2054    nir_foreach_function_temp_variable(var, impl)
2055       var->index = count++;
2056    return count;
2057 }
2058 
2059 static nir_instr *
cursor_next_instr(nir_cursor cursor)2060 cursor_next_instr(nir_cursor cursor)
2061 {
2062    switch (cursor.option) {
2063    case nir_cursor_before_block:
2064       for (nir_block *block = cursor.block; block;
2065            block = nir_block_cf_tree_next(block)) {
2066          nir_instr *instr = nir_block_first_instr(block);
2067          if (instr)
2068             return instr;
2069       }
2070       return NULL;
2071 
2072    case nir_cursor_after_block:
2073       cursor.block = nir_block_cf_tree_next(cursor.block);
2074       if (cursor.block == NULL)
2075          return NULL;
2076 
2077       cursor.option = nir_cursor_before_block;
2078       return cursor_next_instr(cursor);
2079 
2080    case nir_cursor_before_instr:
2081       return cursor.instr;
2082 
2083    case nir_cursor_after_instr:
2084       if (nir_instr_next(cursor.instr))
2085          return nir_instr_next(cursor.instr);
2086 
2087       cursor.option = nir_cursor_after_block;
2088       cursor.block = cursor.instr->block;
2089       return cursor_next_instr(cursor);
2090    }
2091 
2092    unreachable("Inavlid cursor option");
2093 }
2094 
2095 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2096 nir_function_impl_lower_instructions(nir_function_impl *impl,
2097                                      nir_instr_filter_cb filter,
2098                                      nir_lower_instr_cb lower,
2099                                      void *cb_data)
2100 {
2101    nir_builder b = nir_builder_create(impl);
2102 
2103    nir_metadata preserved = nir_metadata_control_flow;
2104 
2105    bool progress = false;
2106    nir_cursor iter = nir_before_impl(impl);
2107    nir_instr *instr;
2108    while ((instr = cursor_next_instr(iter)) != NULL) {
2109       if (filter && !filter(instr, cb_data)) {
2110          iter = nir_after_instr(instr);
2111          continue;
2112       }
2113 
2114       nir_def *old_def = nir_instr_def(instr);
2115       struct list_head old_uses;
2116       if (old_def != NULL) {
2117          /* We're about to ask the callback to generate a replacement for instr.
2118           * Save off the uses from instr's SSA def so we know what uses to
2119           * rewrite later.  If we use nir_def_rewrite_uses, it fails in the
2120           * case where the generated replacement code uses the result of instr
2121           * itself.  If we use nir_def_rewrite_uses_after (which is the
2122           * normal solution to this problem), it doesn't work well if control-
2123           * flow is inserted as part of the replacement, doesn't handle cases
2124           * where the replacement is something consumed by instr, and suffers
2125           * from performance issues.  This is the only way to 100% guarantee
2126           * that we rewrite the correct set efficiently.
2127           */
2128 
2129          list_replace(&old_def->uses, &old_uses);
2130          list_inithead(&old_def->uses);
2131       }
2132 
2133       b.cursor = nir_after_instr(instr);
2134       nir_def *new_def = lower(&b, instr, cb_data);
2135       if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2136           new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2137          assert(old_def != NULL);
2138          if (new_def->parent_instr->block != instr->block)
2139             preserved = nir_metadata_none;
2140 
2141          list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2142             nir_src_rewrite(use_src, new_def);
2143 
2144          if (nir_def_is_unused(old_def)) {
2145             iter = nir_instr_free_and_dce(instr);
2146          } else {
2147             iter = nir_after_instr(instr);
2148          }
2149          progress = true;
2150       } else {
2151          /* We didn't end up lowering after all.  Put the uses back */
2152          if (old_def)
2153             list_replace(&old_uses, &old_def->uses);
2154 
2155          if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2156             /* Only instructions without a return value can be removed like this */
2157             assert(!old_def);
2158             iter = nir_instr_free_and_dce(instr);
2159             progress = true;
2160          } else
2161             iter = nir_after_instr(instr);
2162 
2163          if (new_def == NIR_LOWER_INSTR_PROGRESS)
2164             progress = true;
2165       }
2166    }
2167 
2168    if (progress) {
2169       nir_metadata_preserve(impl, preserved);
2170    } else {
2171       nir_metadata_preserve(impl, nir_metadata_all);
2172    }
2173 
2174    return progress;
2175 }
2176 
2177 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2178 nir_shader_lower_instructions(nir_shader *shader,
2179                               nir_instr_filter_cb filter,
2180                               nir_lower_instr_cb lower,
2181                               void *cb_data)
2182 {
2183    bool progress = false;
2184 
2185    nir_foreach_function_impl(impl, shader) {
2186       if (nir_function_impl_lower_instructions(impl, filter, lower, cb_data))
2187          progress = true;
2188    }
2189 
2190    return progress;
2191 }
2192 
2193 /**
2194  * Returns true if the shader supports quad-based implicit derivatives on
2195  * texture sampling.
2196  */
2197 bool
nir_shader_supports_implicit_lod(nir_shader * shader)2198 nir_shader_supports_implicit_lod(nir_shader *shader)
2199 {
2200    return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2201            (gl_shader_stage_uses_workgroup(shader->info.stage) &&
2202             shader->info.derivative_group != DERIVATIVE_GROUP_NONE));
2203 }
2204 
2205 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2206 nir_intrinsic_from_system_value(gl_system_value val)
2207 {
2208    switch (val) {
2209    case SYSTEM_VALUE_VERTEX_ID:
2210       return nir_intrinsic_load_vertex_id;
2211    case SYSTEM_VALUE_INSTANCE_ID:
2212       return nir_intrinsic_load_instance_id;
2213    case SYSTEM_VALUE_DRAW_ID:
2214       return nir_intrinsic_load_draw_id;
2215    case SYSTEM_VALUE_BASE_INSTANCE:
2216       return nir_intrinsic_load_base_instance;
2217    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2218       return nir_intrinsic_load_vertex_id_zero_base;
2219    case SYSTEM_VALUE_IS_INDEXED_DRAW:
2220       return nir_intrinsic_load_is_indexed_draw;
2221    case SYSTEM_VALUE_FIRST_VERTEX:
2222       return nir_intrinsic_load_first_vertex;
2223    case SYSTEM_VALUE_BASE_VERTEX:
2224       return nir_intrinsic_load_base_vertex;
2225    case SYSTEM_VALUE_INVOCATION_ID:
2226       return nir_intrinsic_load_invocation_id;
2227    case SYSTEM_VALUE_FRAG_COORD:
2228       return nir_intrinsic_load_frag_coord;
2229    case SYSTEM_VALUE_POINT_COORD:
2230       return nir_intrinsic_load_point_coord;
2231    case SYSTEM_VALUE_LINE_COORD:
2232       return nir_intrinsic_load_line_coord;
2233    case SYSTEM_VALUE_FRONT_FACE:
2234       return nir_intrinsic_load_front_face;
2235    case SYSTEM_VALUE_SAMPLE_ID:
2236       return nir_intrinsic_load_sample_id;
2237    case SYSTEM_VALUE_SAMPLE_POS:
2238       return nir_intrinsic_load_sample_pos;
2239    case SYSTEM_VALUE_SAMPLE_POS_OR_CENTER:
2240       return nir_intrinsic_load_sample_pos_or_center;
2241    case SYSTEM_VALUE_SAMPLE_MASK_IN:
2242       return nir_intrinsic_load_sample_mask_in;
2243    case SYSTEM_VALUE_LAYER_ID:
2244       return nir_intrinsic_load_layer_id;
2245    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2246       return nir_intrinsic_load_local_invocation_id;
2247    case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2248       return nir_intrinsic_load_local_invocation_index;
2249    case SYSTEM_VALUE_WORKGROUP_ID:
2250       return nir_intrinsic_load_workgroup_id;
2251    case SYSTEM_VALUE_BASE_WORKGROUP_ID:
2252       return nir_intrinsic_load_base_workgroup_id;
2253    case SYSTEM_VALUE_WORKGROUP_INDEX:
2254       return nir_intrinsic_load_workgroup_index;
2255    case SYSTEM_VALUE_NUM_WORKGROUPS:
2256       return nir_intrinsic_load_num_workgroups;
2257    case SYSTEM_VALUE_PRIMITIVE_ID:
2258       return nir_intrinsic_load_primitive_id;
2259    case SYSTEM_VALUE_TESS_COORD:
2260       return nir_intrinsic_load_tess_coord;
2261    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2262       return nir_intrinsic_load_tess_level_outer;
2263    case SYSTEM_VALUE_TESS_LEVEL_INNER:
2264       return nir_intrinsic_load_tess_level_inner;
2265    case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2266       return nir_intrinsic_load_tess_level_outer_default;
2267    case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2268       return nir_intrinsic_load_tess_level_inner_default;
2269    case SYSTEM_VALUE_VERTICES_IN:
2270       return nir_intrinsic_load_patch_vertices_in;
2271    case SYSTEM_VALUE_HELPER_INVOCATION:
2272       return nir_intrinsic_load_helper_invocation;
2273    case SYSTEM_VALUE_COLOR0:
2274       return nir_intrinsic_load_color0;
2275    case SYSTEM_VALUE_COLOR1:
2276       return nir_intrinsic_load_color1;
2277    case SYSTEM_VALUE_VIEW_INDEX:
2278       return nir_intrinsic_load_view_index;
2279    case SYSTEM_VALUE_SUBGROUP_SIZE:
2280       return nir_intrinsic_load_subgroup_size;
2281    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2282       return nir_intrinsic_load_subgroup_invocation;
2283    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2284       return nir_intrinsic_load_subgroup_eq_mask;
2285    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2286       return nir_intrinsic_load_subgroup_ge_mask;
2287    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2288       return nir_intrinsic_load_subgroup_gt_mask;
2289    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2290       return nir_intrinsic_load_subgroup_le_mask;
2291    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2292       return nir_intrinsic_load_subgroup_lt_mask;
2293    case SYSTEM_VALUE_NUM_SUBGROUPS:
2294       return nir_intrinsic_load_num_subgroups;
2295    case SYSTEM_VALUE_SUBGROUP_ID:
2296       return nir_intrinsic_load_subgroup_id;
2297    case SYSTEM_VALUE_WORKGROUP_SIZE:
2298       return nir_intrinsic_load_workgroup_size;
2299    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2300       return nir_intrinsic_load_global_invocation_id;
2301    case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2302       return nir_intrinsic_load_base_global_invocation_id;
2303    case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2304       return nir_intrinsic_load_global_invocation_index;
2305    case SYSTEM_VALUE_GLOBAL_GROUP_SIZE:
2306       return nir_intrinsic_load_global_size;
2307    case SYSTEM_VALUE_WORK_DIM:
2308       return nir_intrinsic_load_work_dim;
2309    case SYSTEM_VALUE_USER_DATA_AMD:
2310       return nir_intrinsic_load_user_data_amd;
2311    case SYSTEM_VALUE_RAY_LAUNCH_ID:
2312       return nir_intrinsic_load_ray_launch_id;
2313    case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2314       return nir_intrinsic_load_ray_launch_size;
2315    case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2316       return nir_intrinsic_load_ray_world_origin;
2317    case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2318       return nir_intrinsic_load_ray_world_direction;
2319    case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2320       return nir_intrinsic_load_ray_object_origin;
2321    case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2322       return nir_intrinsic_load_ray_object_direction;
2323    case SYSTEM_VALUE_RAY_T_MIN:
2324       return nir_intrinsic_load_ray_t_min;
2325    case SYSTEM_VALUE_RAY_T_MAX:
2326       return nir_intrinsic_load_ray_t_max;
2327    case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2328       return nir_intrinsic_load_ray_object_to_world;
2329    case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2330       return nir_intrinsic_load_ray_world_to_object;
2331    case SYSTEM_VALUE_RAY_HIT_KIND:
2332       return nir_intrinsic_load_ray_hit_kind;
2333    case SYSTEM_VALUE_RAY_FLAGS:
2334       return nir_intrinsic_load_ray_flags;
2335    case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2336       return nir_intrinsic_load_ray_geometry_index;
2337    case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2338       return nir_intrinsic_load_ray_instance_custom_index;
2339    case SYSTEM_VALUE_CULL_MASK:
2340       return nir_intrinsic_load_cull_mask;
2341    case SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS:
2342       return nir_intrinsic_load_ray_triangle_vertex_positions;
2343    case SYSTEM_VALUE_MESH_VIEW_COUNT:
2344       return nir_intrinsic_load_mesh_view_count;
2345    case SYSTEM_VALUE_FRAG_SHADING_RATE:
2346       return nir_intrinsic_load_frag_shading_rate;
2347    case SYSTEM_VALUE_FULLY_COVERED:
2348       return nir_intrinsic_load_fully_covered;
2349    case SYSTEM_VALUE_FRAG_SIZE:
2350       return nir_intrinsic_load_frag_size;
2351    case SYSTEM_VALUE_FRAG_INVOCATION_COUNT:
2352       return nir_intrinsic_load_frag_invocation_count;
2353    case SYSTEM_VALUE_SHADER_INDEX:
2354       return nir_intrinsic_load_shader_index;
2355    case SYSTEM_VALUE_COALESCED_INPUT_COUNT:
2356       return nir_intrinsic_load_coalesced_input_count;
2357    case SYSTEM_VALUE_WARPS_PER_SM_NV:
2358       return nir_intrinsic_load_warps_per_sm_nv;
2359    case SYSTEM_VALUE_SM_COUNT_NV:
2360       return nir_intrinsic_load_sm_count_nv;
2361    case SYSTEM_VALUE_WARP_ID_NV:
2362       return nir_intrinsic_load_warp_id_nv;
2363    case SYSTEM_VALUE_SM_ID_NV:
2364       return nir_intrinsic_load_sm_id_nv;
2365    default:
2366       unreachable("system value does not directly correspond to intrinsic");
2367    }
2368 }
2369 
2370 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2371 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2372 {
2373    switch (intrin) {
2374    case nir_intrinsic_load_vertex_id:
2375       return SYSTEM_VALUE_VERTEX_ID;
2376    case nir_intrinsic_load_instance_id:
2377       return SYSTEM_VALUE_INSTANCE_ID;
2378    case nir_intrinsic_load_draw_id:
2379       return SYSTEM_VALUE_DRAW_ID;
2380    case nir_intrinsic_load_base_instance:
2381       return SYSTEM_VALUE_BASE_INSTANCE;
2382    case nir_intrinsic_load_vertex_id_zero_base:
2383       return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2384    case nir_intrinsic_load_first_vertex:
2385       return SYSTEM_VALUE_FIRST_VERTEX;
2386    case nir_intrinsic_load_is_indexed_draw:
2387       return SYSTEM_VALUE_IS_INDEXED_DRAW;
2388    case nir_intrinsic_load_base_vertex:
2389       return SYSTEM_VALUE_BASE_VERTEX;
2390    case nir_intrinsic_load_invocation_id:
2391       return SYSTEM_VALUE_INVOCATION_ID;
2392    case nir_intrinsic_load_frag_coord:
2393       return SYSTEM_VALUE_FRAG_COORD;
2394    case nir_intrinsic_load_point_coord:
2395       return SYSTEM_VALUE_POINT_COORD;
2396    case nir_intrinsic_load_line_coord:
2397       return SYSTEM_VALUE_LINE_COORD;
2398    case nir_intrinsic_load_front_face:
2399       return SYSTEM_VALUE_FRONT_FACE;
2400    case nir_intrinsic_load_sample_id:
2401       return SYSTEM_VALUE_SAMPLE_ID;
2402    case nir_intrinsic_load_sample_pos:
2403       return SYSTEM_VALUE_SAMPLE_POS;
2404    case nir_intrinsic_load_sample_pos_or_center:
2405       return SYSTEM_VALUE_SAMPLE_POS_OR_CENTER;
2406    case nir_intrinsic_load_sample_mask_in:
2407       return SYSTEM_VALUE_SAMPLE_MASK_IN;
2408    case nir_intrinsic_load_layer_id:
2409       return SYSTEM_VALUE_LAYER_ID;
2410    case nir_intrinsic_load_local_invocation_id:
2411       return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2412    case nir_intrinsic_load_local_invocation_index:
2413       return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2414    case nir_intrinsic_load_num_workgroups:
2415       return SYSTEM_VALUE_NUM_WORKGROUPS;
2416    case nir_intrinsic_load_workgroup_id:
2417       return SYSTEM_VALUE_WORKGROUP_ID;
2418    case nir_intrinsic_load_base_workgroup_id:
2419       return SYSTEM_VALUE_BASE_WORKGROUP_ID;
2420    case nir_intrinsic_load_workgroup_index:
2421       return SYSTEM_VALUE_WORKGROUP_INDEX;
2422    case nir_intrinsic_load_primitive_id:
2423       return SYSTEM_VALUE_PRIMITIVE_ID;
2424    case nir_intrinsic_load_tess_coord:
2425    case nir_intrinsic_load_tess_coord_xy:
2426       return SYSTEM_VALUE_TESS_COORD;
2427    case nir_intrinsic_load_tess_level_outer:
2428       return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2429    case nir_intrinsic_load_tess_level_inner:
2430       return SYSTEM_VALUE_TESS_LEVEL_INNER;
2431    case nir_intrinsic_load_tess_level_outer_default:
2432       return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2433    case nir_intrinsic_load_tess_level_inner_default:
2434       return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2435    case nir_intrinsic_load_patch_vertices_in:
2436       return SYSTEM_VALUE_VERTICES_IN;
2437    case nir_intrinsic_load_helper_invocation:
2438       return SYSTEM_VALUE_HELPER_INVOCATION;
2439    case nir_intrinsic_load_color0:
2440       return SYSTEM_VALUE_COLOR0;
2441    case nir_intrinsic_load_color1:
2442       return SYSTEM_VALUE_COLOR1;
2443    case nir_intrinsic_load_view_index:
2444       return SYSTEM_VALUE_VIEW_INDEX;
2445    case nir_intrinsic_load_subgroup_size:
2446       return SYSTEM_VALUE_SUBGROUP_SIZE;
2447    case nir_intrinsic_load_subgroup_invocation:
2448       return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2449    case nir_intrinsic_load_subgroup_eq_mask:
2450       return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2451    case nir_intrinsic_load_subgroup_ge_mask:
2452       return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2453    case nir_intrinsic_load_subgroup_gt_mask:
2454       return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2455    case nir_intrinsic_load_subgroup_le_mask:
2456       return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2457    case nir_intrinsic_load_subgroup_lt_mask:
2458       return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2459    case nir_intrinsic_load_num_subgroups:
2460       return SYSTEM_VALUE_NUM_SUBGROUPS;
2461    case nir_intrinsic_load_subgroup_id:
2462       return SYSTEM_VALUE_SUBGROUP_ID;
2463    case nir_intrinsic_load_workgroup_size:
2464       return SYSTEM_VALUE_WORKGROUP_SIZE;
2465    case nir_intrinsic_load_global_invocation_id:
2466       return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2467    case nir_intrinsic_load_base_global_invocation_id:
2468       return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2469    case nir_intrinsic_load_global_invocation_index:
2470       return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2471    case nir_intrinsic_load_global_size:
2472       return SYSTEM_VALUE_GLOBAL_GROUP_SIZE;
2473    case nir_intrinsic_load_work_dim:
2474       return SYSTEM_VALUE_WORK_DIM;
2475    case nir_intrinsic_load_user_data_amd:
2476       return SYSTEM_VALUE_USER_DATA_AMD;
2477    case nir_intrinsic_load_barycentric_model:
2478       return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2479    case nir_intrinsic_load_gs_header_ir3:
2480       return SYSTEM_VALUE_GS_HEADER_IR3;
2481    case nir_intrinsic_load_tcs_header_ir3:
2482       return SYSTEM_VALUE_TCS_HEADER_IR3;
2483    case nir_intrinsic_load_ray_launch_id:
2484       return SYSTEM_VALUE_RAY_LAUNCH_ID;
2485    case nir_intrinsic_load_ray_launch_size:
2486       return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2487    case nir_intrinsic_load_ray_world_origin:
2488       return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2489    case nir_intrinsic_load_ray_world_direction:
2490       return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2491    case nir_intrinsic_load_ray_object_origin:
2492       return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2493    case nir_intrinsic_load_ray_object_direction:
2494       return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2495    case nir_intrinsic_load_ray_t_min:
2496       return SYSTEM_VALUE_RAY_T_MIN;
2497    case nir_intrinsic_load_ray_t_max:
2498       return SYSTEM_VALUE_RAY_T_MAX;
2499    case nir_intrinsic_load_ray_object_to_world:
2500       return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2501    case nir_intrinsic_load_ray_world_to_object:
2502       return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2503    case nir_intrinsic_load_ray_hit_kind:
2504       return SYSTEM_VALUE_RAY_HIT_KIND;
2505    case nir_intrinsic_load_ray_flags:
2506       return SYSTEM_VALUE_RAY_FLAGS;
2507    case nir_intrinsic_load_ray_geometry_index:
2508       return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2509    case nir_intrinsic_load_ray_instance_custom_index:
2510       return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2511    case nir_intrinsic_load_cull_mask:
2512       return SYSTEM_VALUE_CULL_MASK;
2513    case nir_intrinsic_load_ray_triangle_vertex_positions:
2514       return SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS;
2515    case nir_intrinsic_load_frag_shading_rate:
2516       return SYSTEM_VALUE_FRAG_SHADING_RATE;
2517    case nir_intrinsic_load_mesh_view_count:
2518       return SYSTEM_VALUE_MESH_VIEW_COUNT;
2519    case nir_intrinsic_load_fully_covered:
2520       return SYSTEM_VALUE_FULLY_COVERED;
2521    case nir_intrinsic_load_frag_size:
2522       return SYSTEM_VALUE_FRAG_SIZE;
2523    case nir_intrinsic_load_frag_invocation_count:
2524       return SYSTEM_VALUE_FRAG_INVOCATION_COUNT;
2525    case nir_intrinsic_load_shader_index:
2526       return SYSTEM_VALUE_SHADER_INDEX;
2527    case nir_intrinsic_load_coalesced_input_count:
2528       return SYSTEM_VALUE_COALESCED_INPUT_COUNT;
2529    case nir_intrinsic_load_warps_per_sm_nv:
2530       return SYSTEM_VALUE_WARPS_PER_SM_NV;
2531    case nir_intrinsic_load_sm_count_nv:
2532       return SYSTEM_VALUE_SM_COUNT_NV;
2533    case nir_intrinsic_load_warp_id_nv:
2534       return SYSTEM_VALUE_WARP_ID_NV;
2535    case nir_intrinsic_load_sm_id_nv:
2536       return SYSTEM_VALUE_SM_ID_NV;
2537    default:
2538       unreachable("intrinsic doesn't produce a system value");
2539    }
2540 }
2541 
2542 /* OpenGL utility method that remaps the location attributes if they are
2543  * doubles. Not needed for vulkan due the differences on the input location
2544  * count for doubles on vulkan vs OpenGL
2545  *
2546  * The bitfield returned in dual_slot is one bit for each double input slot in
2547  * the original OpenGL single-slot input numbering.  The mapping from old
2548  * locations to new locations is as follows:
2549  *
2550  *    new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2551  */
2552 void
nir_remap_dual_slot_attributes(nir_shader * shader,uint64_t * dual_slot)2553 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2554 {
2555    assert(shader->info.stage == MESA_SHADER_VERTEX);
2556 
2557    *dual_slot = 0;
2558    nir_foreach_shader_in_variable(var, shader) {
2559       if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2560          unsigned slots = glsl_count_attribute_slots(var->type, true);
2561          *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2562       }
2563    }
2564 
2565    nir_foreach_shader_in_variable(var, shader) {
2566       var->data.location +=
2567          util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2568    }
2569 }
2570 
2571 /* Returns an attribute mask that has been re-compacted using the given
2572  * dual_slot mask.
2573  */
2574 uint64_t
nir_get_single_slot_attribs_mask(uint64_t attribs,uint64_t dual_slot)2575 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2576 {
2577    while (dual_slot) {
2578       unsigned loc = u_bit_scan64(&dual_slot);
2579       /* mask of all bits up to and including loc */
2580       uint64_t mask = BITFIELD64_MASK(loc + 1);
2581       attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2582    }
2583    return attribs;
2584 }
2585 
2586 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_def * src,bool bindless)2587 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
2588                             bool bindless)
2589 {
2590    enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2591 
2592    /* Image intrinsics only have one of these */
2593    assert(!nir_intrinsic_has_src_type(intrin) ||
2594           !nir_intrinsic_has_dest_type(intrin));
2595 
2596    nir_alu_type data_type = nir_type_invalid;
2597    if (nir_intrinsic_has_src_type(intrin))
2598       data_type = nir_intrinsic_src_type(intrin);
2599    if (nir_intrinsic_has_dest_type(intrin))
2600       data_type = nir_intrinsic_dest_type(intrin);
2601 
2602    nir_atomic_op atomic_op = 0;
2603    if (nir_intrinsic_has_atomic_op(intrin))
2604       atomic_op = nir_intrinsic_atomic_op(intrin);
2605 
2606    switch (intrin->intrinsic) {
2607 #define CASE(op)                                                       \
2608    case nir_intrinsic_image_deref_##op:                                \
2609       intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2610                                    : nir_intrinsic_image_##op;         \
2611       break;
2612       CASE(load)
2613       CASE(sparse_load)
2614       CASE(store)
2615       CASE(atomic)
2616       CASE(atomic_swap)
2617       CASE(size)
2618       CASE(samples)
2619       CASE(load_raw_intel)
2620       CASE(store_raw_intel)
2621       CASE(fragment_mask_load_amd)
2622       CASE(store_block_agx)
2623 #undef CASE
2624    default:
2625       unreachable("Unhanded image intrinsic");
2626    }
2627 
2628    nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2629 
2630    /* Only update the format if the intrinsic doesn't have one set */
2631    if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2632       nir_intrinsic_set_format(intrin, var->data.image.format);
2633 
2634    nir_intrinsic_set_access(intrin, access | var->data.access);
2635    if (nir_intrinsic_has_src_type(intrin))
2636       nir_intrinsic_set_src_type(intrin, data_type);
2637    if (nir_intrinsic_has_dest_type(intrin))
2638       nir_intrinsic_set_dest_type(intrin, data_type);
2639 
2640    if (nir_intrinsic_has_atomic_op(intrin))
2641       nir_intrinsic_set_atomic_op(intrin, atomic_op);
2642 
2643    nir_src_rewrite(&intrin->src[0], src);
2644 }
2645 
2646 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2647 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2648 {
2649    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2650    int coords = glsl_get_sampler_dim_coordinate_components(dim);
2651    if (dim == GLSL_SAMPLER_DIM_CUBE)
2652       return coords;
2653    else
2654       return coords + nir_intrinsic_image_array(instr);
2655 }
2656 
2657 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2658 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2659 {
2660    switch (call->intrinsic) {
2661    case nir_intrinsic_trace_ray:
2662    case nir_intrinsic_rt_trace_ray:
2663       return &call->src[10];
2664    case nir_intrinsic_execute_callable:
2665    case nir_intrinsic_rt_execute_callable:
2666       return &call->src[1];
2667    default:
2668       unreachable("Not a call intrinsic");
2669       return NULL;
2670    }
2671 }
2672 
2673 nir_binding
nir_chase_binding(nir_src rsrc)2674 nir_chase_binding(nir_src rsrc)
2675 {
2676    nir_binding res = { 0 };
2677    if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2678       const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2679       bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2680       while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2681          nir_deref_instr *deref = nir_src_as_deref(rsrc);
2682 
2683          if (deref->deref_type == nir_deref_type_var) {
2684             res.success = true;
2685             res.var = deref->var;
2686             res.desc_set = deref->var->data.descriptor_set;
2687             res.binding = deref->var->data.binding;
2688             return res;
2689          } else if (deref->deref_type == nir_deref_type_array && is_image) {
2690             if (res.num_indices == ARRAY_SIZE(res.indices))
2691                return (nir_binding){ 0 };
2692             res.indices[res.num_indices++] = deref->arr.index;
2693          }
2694 
2695          rsrc = deref->parent;
2696       }
2697    }
2698 
2699    /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2700     * when removing the offset from addresses. We also consider
2701     * nir_op_is_vec_or_mov() instructions to skip trimming of
2702     * vec2_index_32bit_offset addresses after lowering ALU to scalar.
2703     */
2704    unsigned num_components = nir_src_num_components(rsrc);
2705    while (true) {
2706       nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2707       nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2708       if (alu && alu->op == nir_op_mov) {
2709          for (unsigned i = 0; i < num_components; i++) {
2710             if (alu->src[0].swizzle[i] != i)
2711                return (nir_binding){ 0 };
2712          }
2713          rsrc = alu->src[0].src;
2714       } else if (alu && nir_op_is_vec(alu->op)) {
2715          for (unsigned i = 0; i < num_components; i++) {
2716             if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2717                return (nir_binding){ 0 };
2718          }
2719          rsrc = alu->src[0].src;
2720       } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2721          /* The caller might want to be aware if only the first invocation of
2722           * the indices are used.
2723           */
2724          res.read_first_invocation = true;
2725          rsrc = intrin->src[0];
2726       } else {
2727          break;
2728       }
2729    }
2730 
2731    if (nir_src_is_const(rsrc)) {
2732       /* GL binding model after deref lowering */
2733       res.success = true;
2734       /* Can't use just nir_src_as_uint. Vulkan resource index produces a
2735        * vec2. Some drivers lower it to vec1 (to handle get_ssbo_size for
2736        * example) but others just keep it around as a vec2 (v3dv).
2737        */
2738       res.binding = nir_src_comp_as_uint(rsrc, 0);
2739       return res;
2740    }
2741 
2742    /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2743 
2744    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2745    if (!intrin)
2746       return (nir_binding){ 0 };
2747 
2748    /* Intel resource, similar to load_vulkan_descriptor after it has been
2749     * lowered.
2750     */
2751    if (intrin->intrinsic == nir_intrinsic_resource_intel) {
2752       res.success = true;
2753       res.desc_set = nir_intrinsic_desc_set(intrin);
2754       res.binding = nir_intrinsic_binding(intrin);
2755       /* nir_intrinsic_resource_intel has 3 sources, but src[2] is included in
2756        * src[1], it is kept around for other purposes.
2757        */
2758       res.num_indices = 2;
2759       res.indices[0] = intrin->src[0];
2760       res.indices[1] = intrin->src[1];
2761       return res;
2762    }
2763 
2764    /* skip load_vulkan_descriptor */
2765    if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2766       intrin = nir_src_as_intrinsic(intrin->src[0]);
2767       if (!intrin)
2768          return (nir_binding){ 0 };
2769    }
2770 
2771    if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2772       return (nir_binding){ 0 };
2773 
2774    assert(res.num_indices == 0);
2775    res.success = true;
2776    res.desc_set = nir_intrinsic_desc_set(intrin);
2777    res.binding = nir_intrinsic_binding(intrin);
2778    res.num_indices = 1;
2779    res.indices[0] = intrin->src[0];
2780    return res;
2781 }
2782 
2783 nir_variable *
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2784 nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2785 {
2786    nir_variable *binding_var = NULL;
2787    unsigned count = 0;
2788 
2789    if (!binding.success)
2790       return NULL;
2791 
2792    if (binding.var)
2793       return binding.var;
2794 
2795    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2796       if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2797          binding_var = var;
2798          count++;
2799       }
2800    }
2801 
2802    /* Be conservative if another variable is using the same binding/desc_set
2803     * because the access mask might be different and we can't get it reliably.
2804     */
2805    if (count > 1)
2806       return NULL;
2807 
2808    return binding_var;
2809 }
2810 
2811 nir_scalar
nir_scalar_chase_movs(nir_scalar s)2812 nir_scalar_chase_movs(nir_scalar s)
2813 {
2814    while (nir_scalar_is_alu(s)) {
2815       nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2816       if (alu->op == nir_op_mov) {
2817          s.def = alu->src[0].src.ssa;
2818          s.comp = alu->src[0].swizzle[s.comp];
2819       } else if (nir_op_is_vec(alu->op)) {
2820          s.def = alu->src[s.comp].src.ssa;
2821          s.comp = alu->src[s.comp].swizzle[0];
2822       } else {
2823          break;
2824       }
2825    }
2826 
2827    return s;
2828 }
2829 
2830 nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)2831 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
2832 {
2833    switch (base_type) {
2834    /* clang-format off */
2835    case GLSL_TYPE_BOOL:    return nir_type_bool1;
2836    case GLSL_TYPE_UINT:    return nir_type_uint32;
2837    case GLSL_TYPE_INT:     return nir_type_int32;
2838    case GLSL_TYPE_UINT16:  return nir_type_uint16;
2839    case GLSL_TYPE_INT16:   return nir_type_int16;
2840    case GLSL_TYPE_UINT8:   return nir_type_uint8;
2841    case GLSL_TYPE_INT8:    return nir_type_int8;
2842    case GLSL_TYPE_UINT64:  return nir_type_uint64;
2843    case GLSL_TYPE_INT64:   return nir_type_int64;
2844    case GLSL_TYPE_FLOAT:   return nir_type_float32;
2845    case GLSL_TYPE_FLOAT16: return nir_type_float16;
2846    case GLSL_TYPE_DOUBLE:  return nir_type_float64;
2847       /* clang-format on */
2848 
2849    case GLSL_TYPE_COOPERATIVE_MATRIX:
2850    case GLSL_TYPE_SAMPLER:
2851    case GLSL_TYPE_TEXTURE:
2852    case GLSL_TYPE_IMAGE:
2853    case GLSL_TYPE_ATOMIC_UINT:
2854    case GLSL_TYPE_STRUCT:
2855    case GLSL_TYPE_INTERFACE:
2856    case GLSL_TYPE_ARRAY:
2857    case GLSL_TYPE_VOID:
2858    case GLSL_TYPE_SUBROUTINE:
2859    case GLSL_TYPE_ERROR:
2860       return nir_type_invalid;
2861    }
2862 
2863    unreachable("unknown type");
2864 }
2865 
2866 enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)2867 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
2868 {
2869    /* clang-format off */
2870    switch (base_type) {
2871    case nir_type_bool1:    return GLSL_TYPE_BOOL;
2872    case nir_type_uint32:   return GLSL_TYPE_UINT;
2873    case nir_type_int32:    return GLSL_TYPE_INT;
2874    case nir_type_uint16:   return GLSL_TYPE_UINT16;
2875    case nir_type_int16:    return GLSL_TYPE_INT16;
2876    case nir_type_uint8:    return GLSL_TYPE_UINT8;
2877    case nir_type_int8:     return GLSL_TYPE_INT8;
2878    case nir_type_uint64:   return GLSL_TYPE_UINT64;
2879    case nir_type_int64:    return GLSL_TYPE_INT64;
2880    case nir_type_float32:  return GLSL_TYPE_FLOAT;
2881    case nir_type_float16:  return GLSL_TYPE_FLOAT16;
2882    case nir_type_float64:  return GLSL_TYPE_DOUBLE;
2883    default: unreachable("Not a sized nir_alu_type");
2884    }
2885    /* clang-format on */
2886 }
2887 
2888 nir_op
nir_op_vec(unsigned num_components)2889 nir_op_vec(unsigned num_components)
2890 {
2891    /* clang-format off */
2892    switch (num_components) {
2893    case  1: return nir_op_mov;
2894    case  2: return nir_op_vec2;
2895    case  3: return nir_op_vec3;
2896    case  4: return nir_op_vec4;
2897    case  5: return nir_op_vec5;
2898    case  8: return nir_op_vec8;
2899    case 16: return nir_op_vec16;
2900    default: unreachable("bad component count");
2901    }
2902    /* clang-format on */
2903 }
2904 
2905 bool
nir_op_is_vec(nir_op op)2906 nir_op_is_vec(nir_op op)
2907 {
2908    switch (op) {
2909    case nir_op_vec2:
2910    case nir_op_vec3:
2911    case nir_op_vec4:
2912    case nir_op_vec5:
2913    case nir_op_vec8:
2914    case nir_op_vec16:
2915       return true;
2916    default:
2917       return false;
2918    }
2919 }
2920 
2921 nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)2922 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
2923 {
2924    nir_component_mask_t read_mask = 0;
2925    for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
2926       if (!nir_alu_instr_channel_used(instr, src, c))
2927          continue;
2928 
2929       read_mask |= (1 << instr->src[src].swizzle[c]);
2930    }
2931    return read_mask;
2932 }
2933 
2934 unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)2935 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
2936 {
2937    if (nir_op_infos[instr->op].input_sizes[src] > 0)
2938       return nir_op_infos[instr->op].input_sizes[src];
2939 
2940    return instr->def.num_components;
2941 }
2942 
2943 #define CASE_ALL_SIZES(op) \
2944    case op:                \
2945    case op##8:             \
2946    case op##16:            \
2947    case op##32:
2948 
2949 bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)2950 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
2951 {
2952    switch (instr->op) {
2953       CASE_ALL_SIZES(nir_op_flt)
2954       CASE_ALL_SIZES(nir_op_fge)
2955       CASE_ALL_SIZES(nir_op_fltu)
2956       CASE_ALL_SIZES(nir_op_fgeu)
2957       CASE_ALL_SIZES(nir_op_feq)
2958       CASE_ALL_SIZES(nir_op_fneu)
2959       CASE_ALL_SIZES(nir_op_fequ)
2960       CASE_ALL_SIZES(nir_op_fneo)
2961       CASE_ALL_SIZES(nir_op_funord)
2962       CASE_ALL_SIZES(nir_op_ford)
2963       CASE_ALL_SIZES(nir_op_ilt)
2964       CASE_ALL_SIZES(nir_op_ult)
2965       CASE_ALL_SIZES(nir_op_ige)
2966       CASE_ALL_SIZES(nir_op_uge)
2967       CASE_ALL_SIZES(nir_op_ieq)
2968       CASE_ALL_SIZES(nir_op_ine)
2969       CASE_ALL_SIZES(nir_op_bitz)
2970       CASE_ALL_SIZES(nir_op_bitnz)
2971    case nir_op_inot:
2972       return true;
2973    default:
2974       return false;
2975    }
2976 }
2977 
2978 #undef CASE_ALL_SIZES
2979 
2980 unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)2981 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
2982 {
2983    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
2984    assert(srcn < info->num_srcs);
2985    if (info->src_components[srcn] > 0)
2986       return info->src_components[srcn];
2987    else if (info->src_components[srcn] == 0)
2988       return intr->num_components;
2989    else
2990       return nir_src_num_components(intr->src[srcn]);
2991 }
2992 
2993 unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)2994 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
2995 {
2996    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
2997    if (!info->has_dest)
2998       return 0;
2999    else if (info->dest_components)
3000       return info->dest_components;
3001    else
3002       return intr->num_components;
3003 }
3004 
3005 nir_alu_type
nir_intrinsic_instr_src_type(const nir_intrinsic_instr * intrin,unsigned src)3006 nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src)
3007 {
3008    /* We could go nuts here, but we'll just handle a few simple
3009     * cases and let everything else be untyped.
3010     */
3011    switch (intrin->intrinsic) {
3012    case nir_intrinsic_store_deref: {
3013       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3014       if (src == 1)
3015          return nir_get_nir_type_for_glsl_type(deref->type);
3016       break;
3017    }
3018 
3019    case nir_intrinsic_store_output:
3020       if (src == 0)
3021          return nir_intrinsic_src_type(intrin);
3022       break;
3023 
3024    default:
3025       break;
3026    }
3027 
3028    /* For the most part, we leave other intrinsics alone.  Most
3029     * of them don't matter in OpenGL ES 2.0 drivers anyway.
3030     * However, we should at least check if this is some sort of
3031     * IO intrinsic and flag it's offset and index sources.
3032     */
3033    {
3034       int offset_src_idx = nir_get_io_offset_src_number(intrin);
3035       if (src == offset_src_idx) {
3036          const nir_src *offset_src = offset_src_idx >= 0 ? &intrin->src[offset_src_idx] : NULL;
3037          if (offset_src)
3038             return nir_type_int;
3039       }
3040    }
3041 
3042    return nir_type_invalid;
3043 }
3044 
3045 nir_alu_type
nir_intrinsic_instr_dest_type(const nir_intrinsic_instr * intrin)3046 nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin)
3047 {
3048    /* We could go nuts here, but we'll just handle a few simple
3049     * cases and let everything else be untyped.
3050     */
3051    switch (intrin->intrinsic) {
3052    case nir_intrinsic_load_deref: {
3053       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3054       return nir_get_nir_type_for_glsl_type(deref->type);
3055    }
3056 
3057    case nir_intrinsic_load_input:
3058    case nir_intrinsic_load_per_primitive_input:
3059    case nir_intrinsic_load_uniform:
3060       return nir_intrinsic_dest_type(intrin);
3061 
3062    default:
3063       break;
3064    }
3065 
3066    return nir_type_invalid;
3067 }
3068 
3069 /**
3070  * Helper to copy const_index[] from src to dst, without assuming they
3071  * match in order.
3072  */
3073 void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)3074 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
3075 {
3076    if (src->intrinsic == dst->intrinsic) {
3077       memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
3078       return;
3079    }
3080 
3081    const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
3082    const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
3083 
3084    for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
3085       if (src_info->index_map[i] == 0)
3086          continue;
3087 
3088       /* require that dst instruction also uses the same const_index[]: */
3089       assert(dst_info->index_map[i] > 0);
3090 
3091       dst->const_index[dst_info->index_map[i] - 1] =
3092          src->const_index[src_info->index_map[i] - 1];
3093    }
3094 }
3095 
3096 bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)3097 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
3098 {
3099    switch (instr->op) {
3100    case nir_texop_txf:
3101    case nir_texop_txf_ms:
3102    case nir_texop_txs:
3103    case nir_texop_query_levels:
3104    case nir_texop_texture_samples:
3105    case nir_texop_samples_identical:
3106    case nir_texop_descriptor_amd:
3107       return false;
3108    default:
3109       return true;
3110    }
3111 }
3112 
3113 unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)3114 nir_tex_instr_result_size(const nir_tex_instr *instr)
3115 {
3116    switch (instr->op) {
3117    case nir_texop_txs: {
3118       unsigned ret;
3119       switch (instr->sampler_dim) {
3120       case GLSL_SAMPLER_DIM_1D:
3121       case GLSL_SAMPLER_DIM_BUF:
3122          ret = 1;
3123          break;
3124       case GLSL_SAMPLER_DIM_2D:
3125       case GLSL_SAMPLER_DIM_CUBE:
3126       case GLSL_SAMPLER_DIM_MS:
3127       case GLSL_SAMPLER_DIM_RECT:
3128       case GLSL_SAMPLER_DIM_EXTERNAL:
3129       case GLSL_SAMPLER_DIM_SUBPASS:
3130       case GLSL_SAMPLER_DIM_SUBPASS_MS:
3131          ret = 2;
3132          break;
3133       case GLSL_SAMPLER_DIM_3D:
3134          ret = 3;
3135          break;
3136       default:
3137          unreachable("not reached");
3138       }
3139       if (instr->is_array)
3140          ret++;
3141       return ret;
3142    }
3143 
3144    case nir_texop_lod:
3145       return 2;
3146 
3147    case nir_texop_texture_samples:
3148    case nir_texop_query_levels:
3149    case nir_texop_samples_identical:
3150    case nir_texop_fragment_mask_fetch_amd:
3151    case nir_texop_lod_bias_agx:
3152    case nir_texop_has_custom_border_color_agx:
3153       return 1;
3154 
3155    case nir_texop_descriptor_amd:
3156       return instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
3157 
3158    case nir_texop_sampler_descriptor_amd:
3159       return 4;
3160 
3161    case nir_texop_hdr_dim_nv:
3162    case nir_texop_tex_type_nv:
3163       return 4;
3164 
3165    case nir_texop_custom_border_color_agx:
3166       return 4;
3167 
3168    default:
3169       if (instr->is_shadow && instr->is_new_style_shadow)
3170          return 1;
3171 
3172       return 4;
3173    }
3174 }
3175 
3176 bool
nir_tex_instr_is_query(const nir_tex_instr * instr)3177 nir_tex_instr_is_query(const nir_tex_instr *instr)
3178 {
3179    switch (instr->op) {
3180    case nir_texop_txs:
3181    case nir_texop_lod:
3182    case nir_texop_texture_samples:
3183    case nir_texop_query_levels:
3184    case nir_texop_descriptor_amd:
3185    case nir_texop_sampler_descriptor_amd:
3186    case nir_texop_lod_bias_agx:
3187    case nir_texop_custom_border_color_agx:
3188    case nir_texop_has_custom_border_color_agx:
3189    case nir_texop_hdr_dim_nv:
3190    case nir_texop_tex_type_nv:
3191       return true;
3192    case nir_texop_tex:
3193    case nir_texop_txb:
3194    case nir_texop_txl:
3195    case nir_texop_txd:
3196    case nir_texop_txf:
3197    case nir_texop_txf_ms:
3198    case nir_texop_txf_ms_fb:
3199    case nir_texop_txf_ms_mcs_intel:
3200    case nir_texop_tg4:
3201    case nir_texop_samples_identical:
3202    case nir_texop_fragment_mask_fetch_amd:
3203    case nir_texop_fragment_fetch_amd:
3204       return false;
3205    default:
3206       unreachable("Invalid texture opcode");
3207    }
3208 }
3209 
3210 bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)3211 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
3212 {
3213    switch (instr->op) {
3214    case nir_texop_tex:
3215    case nir_texop_txb:
3216    case nir_texop_lod:
3217       return true;
3218    case nir_texop_tg4:
3219       return instr->is_gather_implicit_lod;
3220    default:
3221       return false;
3222    }
3223 }
3224 
3225 nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)3226 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
3227 {
3228    switch (instr->src[src].src_type) {
3229    case nir_tex_src_coord:
3230       switch (instr->op) {
3231       case nir_texop_txf:
3232       case nir_texop_txf_ms:
3233       case nir_texop_txf_ms_fb:
3234       case nir_texop_txf_ms_mcs_intel:
3235       case nir_texop_samples_identical:
3236       case nir_texop_fragment_fetch_amd:
3237       case nir_texop_fragment_mask_fetch_amd:
3238          return nir_type_int;
3239 
3240       default:
3241          return nir_type_float;
3242       }
3243 
3244    case nir_tex_src_lod:
3245       switch (instr->op) {
3246       case nir_texop_txs:
3247       case nir_texop_txf:
3248       case nir_texop_txf_ms:
3249       case nir_texop_fragment_fetch_amd:
3250       case nir_texop_fragment_mask_fetch_amd:
3251          return nir_type_int;
3252 
3253       default:
3254          return nir_type_float;
3255       }
3256 
3257    case nir_tex_src_projector:
3258    case nir_tex_src_comparator:
3259    case nir_tex_src_bias:
3260    case nir_tex_src_min_lod:
3261    case nir_tex_src_ddx:
3262    case nir_tex_src_ddy:
3263    case nir_tex_src_backend1:
3264    case nir_tex_src_backend2:
3265       return nir_type_float;
3266 
3267    case nir_tex_src_offset:
3268    case nir_tex_src_ms_index:
3269    case nir_tex_src_plane:
3270       return nir_type_int;
3271 
3272    case nir_tex_src_sampler_deref_intrinsic:
3273    case nir_tex_src_texture_deref_intrinsic:
3274    case nir_tex_src_ms_mcs_intel:
3275    case nir_tex_src_texture_deref:
3276    case nir_tex_src_sampler_deref:
3277    case nir_tex_src_texture_offset:
3278    case nir_tex_src_sampler_offset:
3279    case nir_tex_src_texture_handle:
3280    case nir_tex_src_sampler_handle:
3281       return nir_type_uint;
3282 
3283    case nir_num_tex_src_types:
3284       unreachable("nir_num_tex_src_types is not a valid source type");
3285    }
3286 
3287    unreachable("Invalid texture source type");
3288 }
3289 
3290 unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)3291 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
3292 {
3293    if (instr->src[src].src_type == nir_tex_src_coord)
3294       return instr->coord_components;
3295 
3296    /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
3297    if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
3298       return 4;
3299 
3300    if (instr->src[src].src_type == nir_tex_src_ddx ||
3301        instr->src[src].src_type == nir_tex_src_ddy) {
3302 
3303       if (instr->is_array && !instr->array_is_lowered_cube)
3304          return instr->coord_components - 1;
3305       else
3306          return instr->coord_components;
3307    }
3308 
3309    if (instr->src[src].src_type == nir_tex_src_offset) {
3310       if (instr->is_array)
3311          return instr->coord_components - 1;
3312       else
3313          return instr->coord_components;
3314    }
3315 
3316    if (instr->src[src].src_type == nir_tex_src_backend1 ||
3317        instr->src[src].src_type == nir_tex_src_backend2)
3318       return nir_src_num_components(instr->src[src].src);
3319 
3320    /* For AMD, this can be a vec8/vec4 image/sampler descriptor. */
3321    if (instr->src[src].src_type == nir_tex_src_texture_handle ||
3322        instr->src[src].src_type == nir_tex_src_sampler_handle)
3323       return 0;
3324 
3325    return 1;
3326 }
3327 
3328 /**
3329  * Return which components are written into transform feedback buffers.
3330  * The result is relative to 0, not "component".
3331  */
3332 unsigned
nir_instr_xfb_write_mask(nir_intrinsic_instr * instr)3333 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
3334 {
3335    unsigned mask = 0;
3336 
3337    if (nir_intrinsic_has_io_xfb(instr)) {
3338       unsigned wr_mask = nir_intrinsic_write_mask(instr) << nir_intrinsic_component(instr);
3339       assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
3340 
3341       unsigned iter_mask = wr_mask;
3342       while (iter_mask) {
3343          unsigned i = u_bit_scan(&iter_mask);
3344          nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) : nir_intrinsic_io_xfb2(instr);
3345          if (xfb.out[i % 2].num_components)
3346             mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
3347       }
3348    }
3349 
3350    return mask;
3351 }
3352 
3353 /**
3354  * Whether an output slot is consumed by fixed-function logic.
3355  */
3356 bool
nir_slot_is_sysval_output(gl_varying_slot slot,gl_shader_stage next_shader)3357 nir_slot_is_sysval_output(gl_varying_slot slot, gl_shader_stage next_shader)
3358 {
3359    switch (next_shader) {
3360    case MESA_SHADER_FRAGMENT:
3361       return slot == VARYING_SLOT_POS ||
3362              slot == VARYING_SLOT_PSIZ ||
3363              slot == VARYING_SLOT_EDGE ||
3364              slot == VARYING_SLOT_CLIP_VERTEX ||
3365              slot == VARYING_SLOT_CLIP_DIST0 ||
3366              slot == VARYING_SLOT_CLIP_DIST1 ||
3367              slot == VARYING_SLOT_CULL_DIST0 ||
3368              slot == VARYING_SLOT_CULL_DIST1 ||
3369              slot == VARYING_SLOT_LAYER ||
3370              slot == VARYING_SLOT_VIEWPORT ||
3371              slot == VARYING_SLOT_VIEW_INDEX ||
3372              slot == VARYING_SLOT_VIEWPORT_MASK ||
3373              slot == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
3374              /* NV_mesh_shader_only */
3375              slot == VARYING_SLOT_PRIMITIVE_COUNT ||
3376              slot == VARYING_SLOT_PRIMITIVE_INDICES;
3377 
3378    case MESA_SHADER_TESS_EVAL:
3379       return slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3380              slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3381              slot == VARYING_SLOT_BOUNDING_BOX0 ||
3382              slot == VARYING_SLOT_BOUNDING_BOX1;
3383 
3384    case MESA_SHADER_MESH:
3385       /* NV_mesh_shader only */
3386       return slot == VARYING_SLOT_TASK_COUNT;
3387 
3388    case MESA_SHADER_NONE:
3389       /* NONE means unknown. Check all possibilities. */
3390       return nir_slot_is_sysval_output(slot, MESA_SHADER_FRAGMENT) ||
3391              nir_slot_is_sysval_output(slot, MESA_SHADER_TESS_EVAL) ||
3392              nir_slot_is_sysval_output(slot, MESA_SHADER_MESH);
3393 
3394    default:
3395       /* No other shaders have preceding shaders with sysval outputs. */
3396       return false;
3397    }
3398 }
3399 
3400 /**
3401  * Whether an input/output slot is consumed by the next shader stage,
3402  * or written by the previous shader stage.
3403  */
3404 bool
nir_slot_is_varying(gl_varying_slot slot)3405 nir_slot_is_varying(gl_varying_slot slot)
3406 {
3407    return slot >= VARYING_SLOT_VAR0 ||
3408           slot == VARYING_SLOT_COL0 ||
3409           slot == VARYING_SLOT_COL1 ||
3410           slot == VARYING_SLOT_BFC0 ||
3411           slot == VARYING_SLOT_BFC1 ||
3412           slot == VARYING_SLOT_FOGC ||
3413           (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) ||
3414           slot == VARYING_SLOT_PNTC ||
3415           slot == VARYING_SLOT_CLIP_DIST0 ||
3416           slot == VARYING_SLOT_CLIP_DIST1 ||
3417           slot == VARYING_SLOT_CULL_DIST0 ||
3418           slot == VARYING_SLOT_CULL_DIST1 ||
3419           slot == VARYING_SLOT_PRIMITIVE_ID ||
3420           slot == VARYING_SLOT_LAYER ||
3421           slot == VARYING_SLOT_VIEWPORT ||
3422           slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3423           slot == VARYING_SLOT_TESS_LEVEL_INNER;
3424 }
3425 
3426 bool
nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,gl_shader_stage next_shader)3427 nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
3428                                       gl_shader_stage next_shader)
3429 {
3430    return nir_slot_is_sysval_output(slot, next_shader) &&
3431           nir_slot_is_varying(slot);
3432 }
3433 
3434 /**
3435  * This marks the output store instruction as not feeding the next shader
3436  * stage. If the instruction has no other use, it's removed.
3437  */
3438 bool
nir_remove_varying(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3439 nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3440 {
3441    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3442 
3443    if ((!sem.no_sysval_output &&
3444         nir_slot_is_sysval_output(sem.location, next_shader)) ||
3445        nir_instr_xfb_write_mask(intr)) {
3446       /* Demote the store instruction. */
3447       sem.no_varying = true;
3448       nir_intrinsic_set_io_semantics(intr, sem);
3449       return false;
3450    } else {
3451       nir_instr_remove(&intr->instr);
3452       return true;
3453    }
3454 }
3455 
3456 /**
3457  * This marks the output store instruction as not feeding fixed-function
3458  * logic. If the instruction has no other use, it's removed.
3459  */
3460 bool
nir_remove_sysval_output(nir_intrinsic_instr * intr)3461 nir_remove_sysval_output(nir_intrinsic_instr *intr)
3462 {
3463    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3464 
3465    if ((!sem.no_varying && nir_slot_is_varying(sem.location)) ||
3466        nir_instr_xfb_write_mask(intr)) {
3467       /* Demote the store instruction. */
3468       sem.no_sysval_output = true;
3469       nir_intrinsic_set_io_semantics(intr, sem);
3470       return false;
3471    } else {
3472       nir_instr_remove(&intr->instr);
3473       return true;
3474    }
3475 }
3476 
3477 void
nir_remove_non_entrypoints(nir_shader * nir)3478 nir_remove_non_entrypoints(nir_shader *nir)
3479 {
3480    nir_foreach_function_safe(func, nir) {
3481       if (!func->is_entrypoint)
3482          exec_node_remove(&func->node);
3483    }
3484    assert(exec_list_length(&nir->functions) == 1);
3485 }
3486 
3487 void
nir_remove_non_exported(nir_shader * nir)3488 nir_remove_non_exported(nir_shader *nir)
3489 {
3490    nir_foreach_function_safe(func, nir) {
3491       if (!func->is_exported)
3492          exec_node_remove(&func->node);
3493    }
3494 }
3495 
3496 unsigned
nir_static_workgroup_size(const nir_shader * s)3497 nir_static_workgroup_size(const nir_shader *s)
3498 {
3499    return s->info.workgroup_size[0] * s->info.workgroup_size[1] *
3500           s->info.workgroup_size[2];
3501 }
3502 
3503 bool
nir_block_contains_work(nir_block * block)3504 nir_block_contains_work(nir_block *block)
3505 {
3506    if (!nir_cf_node_is_last(&block->cf_node))
3507       return true;
3508 
3509    nir_foreach_instr(instr, block) {
3510       if (instr->type == nir_instr_type_phi)
3511          continue;
3512       if (instr->type != nir_instr_type_alu ||
3513           !nir_op_is_vec_or_mov(nir_instr_as_alu(instr)->op))
3514          return true;
3515    }
3516 
3517    return false;
3518 }
3519