xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_lower_vars_to_ssa.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_phi_builder.h"
28 #include "nir_vla.h"
29 
30 struct deref_node {
31    struct deref_node *parent;
32    const struct glsl_type *type;
33 
34    bool lower_to_ssa;
35 
36    /* Only valid for things that end up in the direct list.
37     * Note that multiple nir_deref_instrs may correspond to this node, but
38     * they will all be equivalent, so any is as good as the other.
39     */
40    nir_deref_path path;
41    struct exec_node direct_derefs_link;
42 
43    struct set *loads;
44    struct set *stores;
45    struct set *copies;
46 
47    struct nir_phi_builder_value *pb_value;
48 
49    /* True if this node is fully direct.  If set, it must be in the children
50     * array of its parent.
51     */
52    bool is_direct;
53 
54    /* Set on a root node for a variable to indicate that variable is used by a
55     * cast or passed through some other sequence of instructions that are not
56     * derefs.
57     */
58    bool has_complex_use;
59 
60    struct deref_node *wildcard;
61    struct deref_node *indirect;
62    struct deref_node *children[0];
63 };
64 
65 #define UNDEF_NODE ((struct deref_node *)(uintptr_t)1)
66 
67 struct lower_variables_state {
68    nir_shader *shader;
69    void *dead_ctx;
70    nir_function_impl *impl;
71 
72    /* A hash table mapping variables to deref_node data */
73    struct hash_table *deref_var_nodes;
74 
75    /* A hash table mapping fully-qualified direct dereferences, i.e.
76     * dereferences with no indirect or wildcard array dereferences, to
77     * deref_node data.
78     *
79     * At the moment, we only lower loads, stores, and copies that can be
80     * trivially lowered to loads and stores, i.e. copies with no indirects
81     * and no wildcards.  If a part of a variable that is being loaded from
82     * and/or stored into is also involved in a copy operation with
83     * wildcards, then we lower that copy operation to loads and stores, but
84     * otherwise we leave copies with wildcards alone. Since the only derefs
85     * used in these loads, stores, and trivial copies are ones with no
86     * wildcards and no indirects, these are precisely the derefs that we
87     * can actually consider lowering.
88     */
89    struct exec_list direct_deref_nodes;
90 
91    /* Controls whether get_deref_node will add variables to the
92     * direct_deref_nodes table.  This is turned on when we are initially
93     * scanning for load/store instructions.  It is then turned off so we
94     * don't accidentally change the direct_deref_nodes table while we're
95     * iterating throug it.
96     */
97    bool add_to_direct_deref_nodes;
98 
99    struct nir_phi_builder *phi_builder;
100 };
101 
102 static struct deref_node *
deref_node_create(struct deref_node * parent,const struct glsl_type * type,bool is_direct,void * mem_ctx)103 deref_node_create(struct deref_node *parent,
104                   const struct glsl_type *type,
105                   bool is_direct, void *mem_ctx)
106 {
107    size_t size = sizeof(struct deref_node) +
108                  glsl_get_length(type) * sizeof(struct deref_node *);
109 
110    struct deref_node *node = rzalloc_size(mem_ctx, size);
111    node->type = type;
112    node->parent = parent;
113    exec_node_init(&node->direct_derefs_link);
114    node->is_direct = is_direct;
115 
116    return node;
117 }
118 
119 /* Returns the deref node associated with the given variable.  This will be
120  * the root of the tree representing all of the derefs of the given variable.
121  */
122 static struct deref_node *
get_deref_node_for_var(nir_variable * var,struct lower_variables_state * state)123 get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state)
124 {
125    struct deref_node *node;
126 
127    struct hash_entry *var_entry =
128       _mesa_hash_table_search(state->deref_var_nodes, var);
129 
130    if (var_entry) {
131       return var_entry->data;
132    } else {
133       node = deref_node_create(NULL, var->type, true, state->dead_ctx);
134       _mesa_hash_table_insert(state->deref_var_nodes, var, node);
135       return node;
136    }
137 }
138 
139 /* Gets the deref_node for the given deref chain and creates it if it
140  * doesn't yet exist.  If the deref is fully-qualified and direct and
141  * state->add_to_direct_deref_nodes is true, it will be added to the hash
142  * table of of fully-qualified direct derefs.
143  */
144 static struct deref_node *
get_deref_node_recur(nir_deref_instr * deref,struct lower_variables_state * state)145 get_deref_node_recur(nir_deref_instr *deref,
146                      struct lower_variables_state *state)
147 {
148    if (deref->deref_type == nir_deref_type_var)
149       return get_deref_node_for_var(deref->var, state);
150 
151    if (deref->deref_type == nir_deref_type_cast)
152       return NULL;
153 
154    struct deref_node *parent =
155       get_deref_node_recur(nir_deref_instr_parent(deref), state);
156    if (parent == NULL)
157       return NULL;
158 
159    if (parent == UNDEF_NODE)
160       return UNDEF_NODE;
161 
162    switch (deref->deref_type) {
163    case nir_deref_type_struct:
164       assert(glsl_type_is_struct_or_ifc(parent->type));
165       assert(deref->strct.index < glsl_get_length(parent->type));
166 
167       if (parent->children[deref->strct.index] == NULL) {
168          parent->children[deref->strct.index] =
169             deref_node_create(parent, deref->type, parent->is_direct,
170                               state->dead_ctx);
171       }
172 
173       return parent->children[deref->strct.index];
174 
175    case nir_deref_type_array: {
176       if (glsl_type_is_vector_or_scalar(parent->type)) {
177          /* For an array deref of a vector, return the vector */
178          assert(glsl_type_is_vector(parent->type));
179          return parent;
180       } else if (nir_src_is_const(deref->arr.index)) {
181          uint32_t index = nir_src_as_uint(deref->arr.index);
182          /* This is possible if a loop unrolls and generates an
183           * out-of-bounds offset.  We need to handle this at least
184           * somewhat gracefully.
185           */
186          if (index >= glsl_get_length(parent->type))
187             return UNDEF_NODE;
188 
189          if (parent->children[index] == NULL) {
190             parent->children[index] =
191                deref_node_create(parent, deref->type, parent->is_direct,
192                                  state->dead_ctx);
193          }
194 
195          return parent->children[index];
196       } else {
197          if (parent->indirect == NULL) {
198             parent->indirect =
199                deref_node_create(parent, deref->type, false, state->dead_ctx);
200          }
201 
202          return parent->indirect;
203       }
204       break;
205    }
206 
207    case nir_deref_type_array_wildcard:
208       if (parent->wildcard == NULL) {
209          parent->wildcard =
210             deref_node_create(parent, deref->type, false, state->dead_ctx);
211       }
212 
213       return parent->wildcard;
214 
215    default:
216       unreachable("Invalid deref type");
217    }
218 }
219 
220 static struct deref_node *
get_deref_node(nir_deref_instr * deref,struct lower_variables_state * state)221 get_deref_node(nir_deref_instr *deref, struct lower_variables_state *state)
222 {
223    /* This pass only works on local variables.  Just ignore any derefs with
224     * a non-local mode.
225     */
226    if (!nir_deref_mode_must_be(deref, nir_var_function_temp))
227       return NULL;
228 
229    if (glsl_type_is_cmat(deref->type))
230       return NULL;
231 
232    struct deref_node *node = get_deref_node_recur(deref, state);
233    if (!node)
234       return NULL;
235 
236    /* Insert the node in the direct derefs list.  We only do this if it's not
237     * already in the list and we only bother for deref nodes which are used
238     * directly in a load or store.
239     */
240    if (node != UNDEF_NODE && node->is_direct &&
241        state->add_to_direct_deref_nodes &&
242        node->direct_derefs_link.next == NULL) {
243       nir_deref_path_init(&node->path, deref, state->dead_ctx);
244       assert(deref->var != NULL);
245       exec_list_push_tail(&state->direct_deref_nodes,
246                           &node->direct_derefs_link);
247    }
248 
249    return node;
250 }
251 
252 /* \sa foreach_deref_node_match */
253 static void
foreach_deref_node_worker(struct deref_node * node,nir_deref_instr ** path,void (* cb)(struct deref_node * node,struct lower_variables_state * state),struct lower_variables_state * state)254 foreach_deref_node_worker(struct deref_node *node, nir_deref_instr **path,
255                           void (*cb)(struct deref_node *node,
256                                      struct lower_variables_state *state),
257                           struct lower_variables_state *state)
258 {
259    if (glsl_type_is_vector_or_scalar(node->type)) {
260       assert(*path == NULL || (*path)->deref_type == nir_deref_type_array);
261       cb(node, state);
262       return;
263    }
264 
265    switch ((*path)->deref_type) {
266    case nir_deref_type_struct:
267       if (node->children[(*path)->strct.index]) {
268          foreach_deref_node_worker(node->children[(*path)->strct.index],
269                                    path + 1, cb, state);
270       }
271       return;
272 
273    case nir_deref_type_array: {
274       if (glsl_type_is_vector_or_scalar(node->type))
275          return;
276 
277       uint32_t index = nir_src_as_uint((*path)->arr.index);
278 
279       if (node->children[index]) {
280          foreach_deref_node_worker(node->children[index],
281                                    path + 1, cb, state);
282       }
283 
284       if (node->wildcard) {
285          foreach_deref_node_worker(node->wildcard,
286                                    path + 1, cb, state);
287       }
288       return;
289    }
290 
291    default:
292       unreachable("Unsupported deref type");
293    }
294 }
295 
296 /* Walks over every "matching" deref_node and calls the callback.  A node
297  * is considered to "match" if either refers to that deref or matches up t
298  * a wildcard.  In other words, the following would match a[6].foo[3].bar:
299  *
300  * a[6].foo[3].bar
301  * a[*].foo[3].bar
302  * a[6].foo[*].bar
303  * a[*].foo[*].bar
304  *
305  * The given deref must be a full-length and fully qualified (no wildcards
306  * or indirects) deref chain.
307  */
308 static void
foreach_deref_node_match(nir_deref_path * path,void (* cb)(struct deref_node * node,struct lower_variables_state * state),struct lower_variables_state * state)309 foreach_deref_node_match(nir_deref_path *path,
310                          void (*cb)(struct deref_node *node,
311                                     struct lower_variables_state *state),
312                          struct lower_variables_state *state)
313 {
314    assert(path->path[0]->deref_type == nir_deref_type_var);
315    struct deref_node *node = get_deref_node_for_var(path->path[0]->var, state);
316 
317    if (node == NULL)
318       return;
319 
320    foreach_deref_node_worker(node, &path->path[1], cb, state);
321 }
322 
323 /* \sa deref_may_be_aliased */
324 static bool
path_may_be_aliased_node(struct deref_node * node,nir_deref_instr ** path,struct lower_variables_state * state)325 path_may_be_aliased_node(struct deref_node *node, nir_deref_instr **path,
326                          struct lower_variables_state *state)
327 {
328    if (*path == NULL)
329       return false;
330 
331    switch ((*path)->deref_type) {
332    case nir_deref_type_struct:
333       if (node->children[(*path)->strct.index]) {
334          return path_may_be_aliased_node(node->children[(*path)->strct.index],
335                                          path + 1, state);
336       } else {
337          return false;
338       }
339 
340    case nir_deref_type_array: {
341       /* If the node is a vector, we consider it to not be aliased by any
342        * indirects for the purposes of this pass.  We'll insert a pile of
343        * bcsel if needed to resolve indirects.
344        */
345       if (glsl_type_is_vector_or_scalar(node->type))
346          return false;
347 
348       if (!nir_src_is_const((*path)->arr.index))
349          return true;
350 
351       uint32_t index = nir_src_as_uint((*path)->arr.index);
352 
353       /* If there is an indirect at this level, we're aliased. */
354       if (node->indirect)
355          return true;
356 
357       if (node->children[index] &&
358           path_may_be_aliased_node(node->children[index],
359                                    path + 1, state))
360          return true;
361 
362       if (node->wildcard &&
363           path_may_be_aliased_node(node->wildcard, path + 1, state))
364          return true;
365 
366       return false;
367    }
368 
369    default:
370       unreachable("Unsupported deref type");
371    }
372 }
373 
374 /* Returns true if there are no indirects that can ever touch this deref.
375  *
376  * The one exception here is that we allow indirects which select components
377  * of vectors.  These are handled by this pass by inserting the requisite
378  * pile of bcsel().
379  *
380  * For example, if the given deref is a[6].foo, then any uses of a[i].foo
381  * would cause this to return false, but a[i].bar would not affect it
382  * because it's a different structure member.  A var_copy involving of
383  * a[*].bar also doesn't affect it because that can be lowered to entirely
384  * direct load/stores.
385  *
386  * We only support asking this question about fully-qualified derefs.
387  * Obviously, it's pointless to ask this about indirects, but we also
388  * rule-out wildcards.  Handling Wildcard dereferences would involve
389  * checking each array index to make sure that there aren't any indirect
390  * references.
391  */
392 static bool
path_may_be_aliased(nir_deref_path * path,struct lower_variables_state * state)393 path_may_be_aliased(nir_deref_path *path,
394                     struct lower_variables_state *state)
395 {
396    assert(path->path[0]->deref_type == nir_deref_type_var);
397    nir_variable *var = path->path[0]->var;
398    struct deref_node *var_node = get_deref_node_for_var(var, state);
399 
400    /* First see if this variable is ever used by anything other than a
401     * load/store.  If there's even so much as a cast in the way, we have to
402     * assume aliasing and bail.
403     */
404    if (var_node->has_complex_use)
405       return true;
406 
407    return path_may_be_aliased_node(var_node, &path->path[1], state);
408 }
409 
410 static void
register_complex_use(nir_deref_instr * deref,struct lower_variables_state * state)411 register_complex_use(nir_deref_instr *deref,
412                      struct lower_variables_state *state)
413 {
414    assert(deref->deref_type == nir_deref_type_var);
415    struct deref_node *node = get_deref_node_for_var(deref->var, state);
416    if (node == NULL)
417       return;
418 
419    node->has_complex_use = true;
420 }
421 
422 static bool
register_load_instr(nir_intrinsic_instr * load_instr,struct lower_variables_state * state)423 register_load_instr(nir_intrinsic_instr *load_instr,
424                     struct lower_variables_state *state)
425 {
426    nir_deref_instr *deref = nir_src_as_deref(load_instr->src[0]);
427    struct deref_node *node = get_deref_node(deref, state);
428    if (node == NULL)
429       return false;
430 
431    /* Replace out-of-bounds load derefs with an undef, so that they don't get
432     * left around when a driver has lowered all indirects and thus doesn't
433     * expect any array derefs at all after vars_to_ssa.
434     */
435    if (node == UNDEF_NODE) {
436       nir_undef_instr *undef =
437          nir_undef_instr_create(state->shader,
438                                 load_instr->num_components,
439                                 load_instr->def.bit_size);
440 
441       nir_instr_insert_before(&load_instr->instr, &undef->instr);
442       nir_instr_remove(&load_instr->instr);
443 
444       nir_def_rewrite_uses(&load_instr->def, &undef->def);
445       return true;
446    }
447 
448    if (node->loads == NULL)
449       node->loads = _mesa_pointer_set_create(state->dead_ctx);
450 
451    _mesa_set_add(node->loads, load_instr);
452 
453    return false;
454 }
455 
456 static bool
register_store_instr(nir_intrinsic_instr * store_instr,struct lower_variables_state * state)457 register_store_instr(nir_intrinsic_instr *store_instr,
458                      struct lower_variables_state *state)
459 {
460    nir_deref_instr *deref = nir_src_as_deref(store_instr->src[0]);
461    struct deref_node *node = get_deref_node(deref, state);
462 
463    /* Drop out-of-bounds store derefs, so that they don't get left around when a
464     * driver has lowered all indirects and thus doesn't expect any array derefs
465     * at all after vars_to_ssa.
466     */
467    if (node == UNDEF_NODE) {
468       nir_instr_remove(&store_instr->instr);
469       return true;
470    }
471 
472    if (node == NULL)
473       return false;
474 
475    if (node->stores == NULL)
476       node->stores = _mesa_pointer_set_create(state->dead_ctx);
477 
478    _mesa_set_add(node->stores, store_instr);
479 
480    return false;
481 }
482 
483 static void
register_copy_instr(nir_intrinsic_instr * copy_instr,struct lower_variables_state * state)484 register_copy_instr(nir_intrinsic_instr *copy_instr,
485                     struct lower_variables_state *state)
486 {
487    for (unsigned idx = 0; idx < 2; idx++) {
488       nir_deref_instr *deref = nir_src_as_deref(copy_instr->src[idx]);
489       struct deref_node *node = get_deref_node(deref, state);
490       if (node == NULL || node == UNDEF_NODE)
491          continue;
492 
493       if (node->copies == NULL)
494          node->copies = _mesa_pointer_set_create(state->dead_ctx);
495 
496       _mesa_set_add(node->copies, copy_instr);
497    }
498 }
499 
500 static bool
register_variable_uses(nir_function_impl * impl,struct lower_variables_state * state)501 register_variable_uses(nir_function_impl *impl,
502                        struct lower_variables_state *state)
503 {
504    bool progress = false;
505 
506    nir_foreach_block(block, impl) {
507       nir_foreach_instr_safe(instr, block) {
508          switch (instr->type) {
509          case nir_instr_type_deref: {
510             nir_deref_instr *deref = nir_instr_as_deref(instr);
511 
512             if (deref->deref_type == nir_deref_type_var &&
513                 nir_deref_instr_has_complex_use(deref, 0))
514                register_complex_use(deref, state);
515 
516             break;
517          }
518 
519          case nir_instr_type_intrinsic: {
520             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
521 
522             switch (intrin->intrinsic) {
523             case nir_intrinsic_load_deref:
524                progress = register_load_instr(intrin, state) || progress;
525                break;
526 
527             case nir_intrinsic_store_deref:
528                progress = register_store_instr(intrin, state) || progress;
529                break;
530 
531             case nir_intrinsic_copy_deref:
532                register_copy_instr(intrin, state);
533                break;
534 
535             default:
536                continue;
537             }
538             break;
539          }
540 
541          default:
542             break;
543          }
544       }
545    }
546    return progress;
547 }
548 
549 /* Walks over all of the copy instructions to or from the given deref_node
550  * and lowers them to load/store intrinsics.
551  */
552 static void
lower_copies_to_load_store(struct deref_node * node,struct lower_variables_state * state)553 lower_copies_to_load_store(struct deref_node *node,
554                            struct lower_variables_state *state)
555 {
556    if (!node->copies)
557       return;
558 
559    nir_builder b = nir_builder_create(state->impl);
560 
561    set_foreach(node->copies, copy_entry) {
562       nir_intrinsic_instr *copy = (void *)copy_entry->key;
563 
564       nir_lower_deref_copy_instr(&b, copy);
565 
566       for (unsigned i = 0; i < 2; ++i) {
567          nir_deref_instr *arg_deref = nir_src_as_deref(copy->src[i]);
568          struct deref_node *arg_node = get_deref_node(arg_deref, state);
569 
570          /* Only bother removing copy entries for other nodes */
571          if (arg_node == NULL || arg_node == node)
572             continue;
573 
574          struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
575          assert(arg_entry);
576          _mesa_set_remove(arg_node->copies, arg_entry);
577       }
578 
579       nir_instr_remove(&copy->instr);
580    }
581 
582    node->copies = NULL;
583 }
584 
585 static nir_def *
deref_vec_component(nir_deref_instr * deref)586 deref_vec_component(nir_deref_instr *deref)
587 {
588    if (deref->deref_type != nir_deref_type_array) {
589       assert(glsl_type_is_vector_or_scalar(deref->type));
590       return NULL;
591    }
592 
593    nir_deref_instr *parent = nir_deref_instr_parent(deref);
594    if (glsl_type_is_vector_or_scalar(parent->type)) {
595       assert(glsl_type_is_scalar(deref->type));
596       return deref->arr.index.ssa;
597    } else {
598       assert(glsl_type_is_vector_or_scalar(deref->type));
599       return NULL;
600    }
601 }
602 
603 /* Performs variable renaming
604  *
605  * This algorithm is very similar to the one outlined in "Efficiently
606  * Computing Static Single Assignment Form and the Control Dependence
607  * Graph" by Cytron et al.  The primary difference is that we only put one
608  * SSA def on the stack per block.
609  */
610 static bool
rename_variables(struct lower_variables_state * state)611 rename_variables(struct lower_variables_state *state)
612 {
613    nir_builder b = nir_builder_create(state->impl);
614 
615    nir_foreach_block(block, state->impl) {
616       nir_foreach_instr_safe(instr, block) {
617          if (instr->type != nir_instr_type_intrinsic)
618             continue;
619 
620          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
621 
622          switch (intrin->intrinsic) {
623          case nir_intrinsic_load_deref: {
624             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
625             if (!nir_deref_mode_must_be(deref, nir_var_function_temp))
626                continue;
627 
628             struct deref_node *node = get_deref_node(deref, state);
629             if (node == NULL)
630                continue;
631 
632             /* Should have been removed before rename_variables(). */
633             assert(node != UNDEF_NODE);
634 
635             if (!node->lower_to_ssa)
636                continue;
637 
638             nir_def *val =
639                nir_phi_builder_value_get_block_def(node->pb_value, block);
640 
641             /* As tempting as it is to just rewrite the uses of our load
642              * instruction with the value we got out of the phi builder, we
643              * can't do that without risking messing ourselves up.  In
644              * particular, the get_deref_node() function we call during
645              * variable renaming uses nir_src_is_const() to determine which
646              * deref node to fetch.  If we propagate directly, we may end up
647              * propagating a constant into an array index, changing the
648              * behavior of get_deref_node() for that deref and invalidating
649              * our analysis.
650              *
651              * With enough work, we could probably make our analysis and data
652              * structures robust against this but it would make everything
653              * more complicated to reason about.  It's easier to just insert
654              * a mov and let copy-prop clean up after us.  This pass is
655              * complicated enough as-is.
656              */
657             b.cursor = nir_before_instr(&intrin->instr);
658             val = nir_mov(&b, val);
659 
660             assert(val->bit_size == intrin->def.bit_size);
661 
662             nir_def *comp = deref_vec_component(deref);
663             if (comp == NULL) {
664                assert(val->num_components == intrin->def.num_components);
665             } else {
666                assert(intrin->def.num_components == 1);
667                b.cursor = nir_before_instr(&intrin->instr);
668                val = nir_vector_extract(&b, val, comp);
669             }
670 
671             nir_def_replace(&intrin->def, val);
672             break;
673          }
674 
675          case nir_intrinsic_store_deref: {
676             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
677             if (!nir_deref_mode_must_be(deref, nir_var_function_temp))
678                continue;
679 
680             struct deref_node *node = get_deref_node(deref, state);
681             if (node == NULL)
682                continue;
683 
684             /* Should have been removed before rename_variables(). */
685             assert(node != UNDEF_NODE);
686 
687             nir_def *value = intrin->src[1].ssa;
688 
689             if (!node->lower_to_ssa)
690                continue;
691 
692             assert(intrin->num_components ==
693                    glsl_get_vector_elements(deref->type));
694 
695             nir_def *new_def;
696             b.cursor = nir_before_instr(&intrin->instr);
697 
698             nir_def *comp = deref_vec_component(deref);
699             unsigned wrmask = nir_intrinsic_write_mask(intrin);
700             if (comp != NULL) {
701                assert(wrmask == 1 && intrin->num_components == 1);
702                nir_def *old_def =
703                   nir_phi_builder_value_get_block_def(node->pb_value, block);
704                new_def = nir_vector_insert(&b, old_def, value, comp);
705             } else if (wrmask == (1 << intrin->num_components) - 1) {
706                /* Whole variable store - just copy the source.  Note that
707                 * intrin->num_components and value->num_components
708                 * may differ.
709                 */
710                unsigned swiz[NIR_MAX_VEC_COMPONENTS];
711                for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
712                   swiz[i] = i < intrin->num_components ? i : 0;
713 
714                new_def = nir_swizzle(&b, value, swiz,
715                                      intrin->num_components);
716             } else {
717                nir_def *old_def =
718                   nir_phi_builder_value_get_block_def(node->pb_value, block);
719                /* For writemasked store_var intrinsics, we combine the newly
720                 * written values with the existing contents of unwritten
721                 * channels, creating a new SSA value for the whole vector.
722                 */
723                nir_scalar srcs[NIR_MAX_VEC_COMPONENTS];
724                for (unsigned i = 0; i < intrin->num_components; i++) {
725                   if (wrmask & (1 << i)) {
726                      srcs[i] = nir_get_scalar(value, i);
727                   } else {
728                      srcs[i] = nir_get_scalar(old_def, i);
729                   }
730                }
731                new_def = nir_vec_scalars(&b, srcs, intrin->num_components);
732             }
733 
734             nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
735             nir_instr_remove(&intrin->instr);
736             break;
737          }
738 
739          default:
740             break;
741          }
742       }
743    }
744 
745    return true;
746 }
747 
748 /** Implements a pass to lower variable uses to SSA values
749  *
750  * This path walks the list of instructions and tries to lower as many
751  * local variable load/store operations to SSA defs and uses as it can.
752  * The process involves four passes:
753  *
754  *  1) Iterate over all of the instructions and mark where each local
755  *     variable deref is used in a load, store, or copy.  While we're at
756  *     it, we keep track of all of the fully-qualified (no wildcards) and
757  *     fully-direct references we see and store them in the
758  *     direct_deref_nodes hash table.
759  *
760  *  2) Walk over the list of fully-qualified direct derefs generated in
761  *     the previous pass.  For each deref, we determine if it can ever be
762  *     aliased, i.e. if there is an indirect reference anywhere that may
763  *     refer to it.  If it cannot be aliased, we mark it for lowering to an
764  *     SSA value.  At this point, we lower any var_copy instructions that
765  *     use the given deref to load/store operations.
766  *
767  *  3) Walk over the list of derefs we plan to lower to SSA values and
768  *     insert phi nodes as needed.
769  *
770  *  4) Perform "variable renaming" by replacing the load/store instructions
771  *     with SSA definitions and SSA uses.
772  */
773 static bool
nir_lower_vars_to_ssa_impl(nir_function_impl * impl)774 nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
775 {
776    struct lower_variables_state state;
777 
778    state.shader = impl->function->shader;
779    state.dead_ctx = ralloc_context(state.shader);
780    state.impl = impl;
781 
782    state.deref_var_nodes = _mesa_pointer_hash_table_create(state.dead_ctx);
783    exec_list_make_empty(&state.direct_deref_nodes);
784 
785    /* Build the initial deref structures and direct_deref_nodes table */
786    state.add_to_direct_deref_nodes = true;
787 
788    bool progress = register_variable_uses(impl, &state);
789 
790    nir_metadata_require(impl, nir_metadata_block_index);
791 
792    /* We're about to iterate through direct_deref_nodes.  Don't modify it. */
793    state.add_to_direct_deref_nodes = false;
794 
795    foreach_list_typed_safe(struct deref_node, node, direct_derefs_link,
796                            &state.direct_deref_nodes) {
797       nir_deref_path *path = &node->path;
798 
799       assert(path->path[0]->deref_type == nir_deref_type_var);
800 
801       /* We don't build deref nodes for non-local variables */
802       assert(path->path[0]->var->data.mode == nir_var_function_temp);
803 
804       if (path_may_be_aliased(path, &state)) {
805          exec_node_remove(&node->direct_derefs_link);
806          continue;
807       }
808 
809       node->lower_to_ssa = true;
810       progress = true;
811 
812       foreach_deref_node_match(path, lower_copies_to_load_store, &state);
813    }
814 
815    if (!progress) {
816       nir_metadata_preserve(impl, nir_metadata_all);
817       return false;
818    }
819 
820    nir_metadata_require(impl, nir_metadata_dominance);
821 
822    /* We may have lowered some copy instructions to load/store
823     * instructions.  The uses from the copy instructions hav already been
824     * removed but we need to rescan to ensure that the uses from the newly
825     * added load/store instructions are registered.  We need this
826     * information for phi node insertion below.
827     */
828    register_variable_uses(impl, &state);
829 
830    state.phi_builder = nir_phi_builder_create(state.impl);
831 
832    BITSET_WORD *store_blocks =
833       ralloc_array(state.dead_ctx, BITSET_WORD,
834                    BITSET_WORDS(state.impl->num_blocks));
835    foreach_list_typed(struct deref_node, node, direct_derefs_link,
836                       &state.direct_deref_nodes) {
837       if (!node->lower_to_ssa)
838          continue;
839 
840       memset(store_blocks, 0,
841              BITSET_WORDS(state.impl->num_blocks) * sizeof(*store_blocks));
842 
843       assert(node->path.path[0]->var->constant_initializer == NULL &&
844              node->path.path[0]->var->pointer_initializer == NULL);
845 
846       if (node->stores) {
847          set_foreach(node->stores, store_entry) {
848             nir_intrinsic_instr *store =
849                (nir_intrinsic_instr *)store_entry->key;
850             BITSET_SET(store_blocks, store->instr.block->index);
851          }
852       }
853 
854       node->pb_value =
855          nir_phi_builder_add_value(state.phi_builder,
856                                    glsl_get_vector_elements(node->type),
857                                    glsl_get_bit_size(node->type),
858                                    store_blocks);
859    }
860 
861    rename_variables(&state);
862 
863    nir_phi_builder_finish(state.phi_builder);
864 
865    nir_metadata_preserve(impl, nir_metadata_control_flow);
866 
867    ralloc_free(state.dead_ctx);
868 
869    return progress;
870 }
871 
872 bool
nir_lower_vars_to_ssa(nir_shader * shader)873 nir_lower_vars_to_ssa(nir_shader *shader)
874 {
875    bool progress = false;
876 
877    nir_foreach_function_impl(impl, shader) {
878       progress |= nir_lower_vars_to_ssa_impl(impl);
879    }
880 
881    return progress;
882 }
883