1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_phi_builder.h"
28 #include "nir_vla.h"
29
30 struct deref_node {
31 struct deref_node *parent;
32 const struct glsl_type *type;
33
34 bool lower_to_ssa;
35
36 /* Only valid for things that end up in the direct list.
37 * Note that multiple nir_deref_instrs may correspond to this node, but
38 * they will all be equivalent, so any is as good as the other.
39 */
40 nir_deref_path path;
41 struct exec_node direct_derefs_link;
42
43 struct set *loads;
44 struct set *stores;
45 struct set *copies;
46
47 struct nir_phi_builder_value *pb_value;
48
49 /* True if this node is fully direct. If set, it must be in the children
50 * array of its parent.
51 */
52 bool is_direct;
53
54 /* Set on a root node for a variable to indicate that variable is used by a
55 * cast or passed through some other sequence of instructions that are not
56 * derefs.
57 */
58 bool has_complex_use;
59
60 struct deref_node *wildcard;
61 struct deref_node *indirect;
62 struct deref_node *children[0];
63 };
64
65 #define UNDEF_NODE ((struct deref_node *)(uintptr_t)1)
66
67 struct lower_variables_state {
68 nir_shader *shader;
69 void *dead_ctx;
70 nir_function_impl *impl;
71
72 /* A hash table mapping variables to deref_node data */
73 struct hash_table *deref_var_nodes;
74
75 /* A hash table mapping fully-qualified direct dereferences, i.e.
76 * dereferences with no indirect or wildcard array dereferences, to
77 * deref_node data.
78 *
79 * At the moment, we only lower loads, stores, and copies that can be
80 * trivially lowered to loads and stores, i.e. copies with no indirects
81 * and no wildcards. If a part of a variable that is being loaded from
82 * and/or stored into is also involved in a copy operation with
83 * wildcards, then we lower that copy operation to loads and stores, but
84 * otherwise we leave copies with wildcards alone. Since the only derefs
85 * used in these loads, stores, and trivial copies are ones with no
86 * wildcards and no indirects, these are precisely the derefs that we
87 * can actually consider lowering.
88 */
89 struct exec_list direct_deref_nodes;
90
91 /* Controls whether get_deref_node will add variables to the
92 * direct_deref_nodes table. This is turned on when we are initially
93 * scanning for load/store instructions. It is then turned off so we
94 * don't accidentally change the direct_deref_nodes table while we're
95 * iterating throug it.
96 */
97 bool add_to_direct_deref_nodes;
98
99 struct nir_phi_builder *phi_builder;
100 };
101
102 static struct deref_node *
deref_node_create(struct deref_node * parent,const struct glsl_type * type,bool is_direct,void * mem_ctx)103 deref_node_create(struct deref_node *parent,
104 const struct glsl_type *type,
105 bool is_direct, void *mem_ctx)
106 {
107 size_t size = sizeof(struct deref_node) +
108 glsl_get_length(type) * sizeof(struct deref_node *);
109
110 struct deref_node *node = rzalloc_size(mem_ctx, size);
111 node->type = type;
112 node->parent = parent;
113 exec_node_init(&node->direct_derefs_link);
114 node->is_direct = is_direct;
115
116 return node;
117 }
118
119 /* Returns the deref node associated with the given variable. This will be
120 * the root of the tree representing all of the derefs of the given variable.
121 */
122 static struct deref_node *
get_deref_node_for_var(nir_variable * var,struct lower_variables_state * state)123 get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state)
124 {
125 struct deref_node *node;
126
127 struct hash_entry *var_entry =
128 _mesa_hash_table_search(state->deref_var_nodes, var);
129
130 if (var_entry) {
131 return var_entry->data;
132 } else {
133 node = deref_node_create(NULL, var->type, true, state->dead_ctx);
134 _mesa_hash_table_insert(state->deref_var_nodes, var, node);
135 return node;
136 }
137 }
138
139 /* Gets the deref_node for the given deref chain and creates it if it
140 * doesn't yet exist. If the deref is fully-qualified and direct and
141 * state->add_to_direct_deref_nodes is true, it will be added to the hash
142 * table of of fully-qualified direct derefs.
143 */
144 static struct deref_node *
get_deref_node_recur(nir_deref_instr * deref,struct lower_variables_state * state)145 get_deref_node_recur(nir_deref_instr *deref,
146 struct lower_variables_state *state)
147 {
148 if (deref->deref_type == nir_deref_type_var)
149 return get_deref_node_for_var(deref->var, state);
150
151 if (deref->deref_type == nir_deref_type_cast)
152 return NULL;
153
154 struct deref_node *parent =
155 get_deref_node_recur(nir_deref_instr_parent(deref), state);
156 if (parent == NULL)
157 return NULL;
158
159 if (parent == UNDEF_NODE)
160 return UNDEF_NODE;
161
162 switch (deref->deref_type) {
163 case nir_deref_type_struct:
164 assert(glsl_type_is_struct_or_ifc(parent->type));
165 assert(deref->strct.index < glsl_get_length(parent->type));
166
167 if (parent->children[deref->strct.index] == NULL) {
168 parent->children[deref->strct.index] =
169 deref_node_create(parent, deref->type, parent->is_direct,
170 state->dead_ctx);
171 }
172
173 return parent->children[deref->strct.index];
174
175 case nir_deref_type_array: {
176 if (glsl_type_is_vector_or_scalar(parent->type)) {
177 /* For an array deref of a vector, return the vector */
178 assert(glsl_type_is_vector(parent->type));
179 return parent;
180 } else if (nir_src_is_const(deref->arr.index)) {
181 uint32_t index = nir_src_as_uint(deref->arr.index);
182 /* This is possible if a loop unrolls and generates an
183 * out-of-bounds offset. We need to handle this at least
184 * somewhat gracefully.
185 */
186 if (index >= glsl_get_length(parent->type))
187 return UNDEF_NODE;
188
189 if (parent->children[index] == NULL) {
190 parent->children[index] =
191 deref_node_create(parent, deref->type, parent->is_direct,
192 state->dead_ctx);
193 }
194
195 return parent->children[index];
196 } else {
197 if (parent->indirect == NULL) {
198 parent->indirect =
199 deref_node_create(parent, deref->type, false, state->dead_ctx);
200 }
201
202 return parent->indirect;
203 }
204 break;
205 }
206
207 case nir_deref_type_array_wildcard:
208 if (parent->wildcard == NULL) {
209 parent->wildcard =
210 deref_node_create(parent, deref->type, false, state->dead_ctx);
211 }
212
213 return parent->wildcard;
214
215 default:
216 unreachable("Invalid deref type");
217 }
218 }
219
220 static struct deref_node *
get_deref_node(nir_deref_instr * deref,struct lower_variables_state * state)221 get_deref_node(nir_deref_instr *deref, struct lower_variables_state *state)
222 {
223 /* This pass only works on local variables. Just ignore any derefs with
224 * a non-local mode.
225 */
226 if (!nir_deref_mode_must_be(deref, nir_var_function_temp))
227 return NULL;
228
229 if (glsl_type_is_cmat(deref->type))
230 return NULL;
231
232 struct deref_node *node = get_deref_node_recur(deref, state);
233 if (!node)
234 return NULL;
235
236 /* Insert the node in the direct derefs list. We only do this if it's not
237 * already in the list and we only bother for deref nodes which are used
238 * directly in a load or store.
239 */
240 if (node != UNDEF_NODE && node->is_direct &&
241 state->add_to_direct_deref_nodes &&
242 node->direct_derefs_link.next == NULL) {
243 nir_deref_path_init(&node->path, deref, state->dead_ctx);
244 assert(deref->var != NULL);
245 exec_list_push_tail(&state->direct_deref_nodes,
246 &node->direct_derefs_link);
247 }
248
249 return node;
250 }
251
252 /* \sa foreach_deref_node_match */
253 static void
foreach_deref_node_worker(struct deref_node * node,nir_deref_instr ** path,void (* cb)(struct deref_node * node,struct lower_variables_state * state),struct lower_variables_state * state)254 foreach_deref_node_worker(struct deref_node *node, nir_deref_instr **path,
255 void (*cb)(struct deref_node *node,
256 struct lower_variables_state *state),
257 struct lower_variables_state *state)
258 {
259 if (glsl_type_is_vector_or_scalar(node->type)) {
260 assert(*path == NULL || (*path)->deref_type == nir_deref_type_array);
261 cb(node, state);
262 return;
263 }
264
265 switch ((*path)->deref_type) {
266 case nir_deref_type_struct:
267 if (node->children[(*path)->strct.index]) {
268 foreach_deref_node_worker(node->children[(*path)->strct.index],
269 path + 1, cb, state);
270 }
271 return;
272
273 case nir_deref_type_array: {
274 if (glsl_type_is_vector_or_scalar(node->type))
275 return;
276
277 uint32_t index = nir_src_as_uint((*path)->arr.index);
278
279 if (node->children[index]) {
280 foreach_deref_node_worker(node->children[index],
281 path + 1, cb, state);
282 }
283
284 if (node->wildcard) {
285 foreach_deref_node_worker(node->wildcard,
286 path + 1, cb, state);
287 }
288 return;
289 }
290
291 default:
292 unreachable("Unsupported deref type");
293 }
294 }
295
296 /* Walks over every "matching" deref_node and calls the callback. A node
297 * is considered to "match" if either refers to that deref or matches up t
298 * a wildcard. In other words, the following would match a[6].foo[3].bar:
299 *
300 * a[6].foo[3].bar
301 * a[*].foo[3].bar
302 * a[6].foo[*].bar
303 * a[*].foo[*].bar
304 *
305 * The given deref must be a full-length and fully qualified (no wildcards
306 * or indirects) deref chain.
307 */
308 static void
foreach_deref_node_match(nir_deref_path * path,void (* cb)(struct deref_node * node,struct lower_variables_state * state),struct lower_variables_state * state)309 foreach_deref_node_match(nir_deref_path *path,
310 void (*cb)(struct deref_node *node,
311 struct lower_variables_state *state),
312 struct lower_variables_state *state)
313 {
314 assert(path->path[0]->deref_type == nir_deref_type_var);
315 struct deref_node *node = get_deref_node_for_var(path->path[0]->var, state);
316
317 if (node == NULL)
318 return;
319
320 foreach_deref_node_worker(node, &path->path[1], cb, state);
321 }
322
323 /* \sa deref_may_be_aliased */
324 static bool
path_may_be_aliased_node(struct deref_node * node,nir_deref_instr ** path,struct lower_variables_state * state)325 path_may_be_aliased_node(struct deref_node *node, nir_deref_instr **path,
326 struct lower_variables_state *state)
327 {
328 if (*path == NULL)
329 return false;
330
331 switch ((*path)->deref_type) {
332 case nir_deref_type_struct:
333 if (node->children[(*path)->strct.index]) {
334 return path_may_be_aliased_node(node->children[(*path)->strct.index],
335 path + 1, state);
336 } else {
337 return false;
338 }
339
340 case nir_deref_type_array: {
341 /* If the node is a vector, we consider it to not be aliased by any
342 * indirects for the purposes of this pass. We'll insert a pile of
343 * bcsel if needed to resolve indirects.
344 */
345 if (glsl_type_is_vector_or_scalar(node->type))
346 return false;
347
348 if (!nir_src_is_const((*path)->arr.index))
349 return true;
350
351 uint32_t index = nir_src_as_uint((*path)->arr.index);
352
353 /* If there is an indirect at this level, we're aliased. */
354 if (node->indirect)
355 return true;
356
357 if (node->children[index] &&
358 path_may_be_aliased_node(node->children[index],
359 path + 1, state))
360 return true;
361
362 if (node->wildcard &&
363 path_may_be_aliased_node(node->wildcard, path + 1, state))
364 return true;
365
366 return false;
367 }
368
369 default:
370 unreachable("Unsupported deref type");
371 }
372 }
373
374 /* Returns true if there are no indirects that can ever touch this deref.
375 *
376 * The one exception here is that we allow indirects which select components
377 * of vectors. These are handled by this pass by inserting the requisite
378 * pile of bcsel().
379 *
380 * For example, if the given deref is a[6].foo, then any uses of a[i].foo
381 * would cause this to return false, but a[i].bar would not affect it
382 * because it's a different structure member. A var_copy involving of
383 * a[*].bar also doesn't affect it because that can be lowered to entirely
384 * direct load/stores.
385 *
386 * We only support asking this question about fully-qualified derefs.
387 * Obviously, it's pointless to ask this about indirects, but we also
388 * rule-out wildcards. Handling Wildcard dereferences would involve
389 * checking each array index to make sure that there aren't any indirect
390 * references.
391 */
392 static bool
path_may_be_aliased(nir_deref_path * path,struct lower_variables_state * state)393 path_may_be_aliased(nir_deref_path *path,
394 struct lower_variables_state *state)
395 {
396 assert(path->path[0]->deref_type == nir_deref_type_var);
397 nir_variable *var = path->path[0]->var;
398 struct deref_node *var_node = get_deref_node_for_var(var, state);
399
400 /* First see if this variable is ever used by anything other than a
401 * load/store. If there's even so much as a cast in the way, we have to
402 * assume aliasing and bail.
403 */
404 if (var_node->has_complex_use)
405 return true;
406
407 return path_may_be_aliased_node(var_node, &path->path[1], state);
408 }
409
410 static void
register_complex_use(nir_deref_instr * deref,struct lower_variables_state * state)411 register_complex_use(nir_deref_instr *deref,
412 struct lower_variables_state *state)
413 {
414 assert(deref->deref_type == nir_deref_type_var);
415 struct deref_node *node = get_deref_node_for_var(deref->var, state);
416 if (node == NULL)
417 return;
418
419 node->has_complex_use = true;
420 }
421
422 static bool
register_load_instr(nir_intrinsic_instr * load_instr,struct lower_variables_state * state)423 register_load_instr(nir_intrinsic_instr *load_instr,
424 struct lower_variables_state *state)
425 {
426 nir_deref_instr *deref = nir_src_as_deref(load_instr->src[0]);
427 struct deref_node *node = get_deref_node(deref, state);
428 if (node == NULL)
429 return false;
430
431 /* Replace out-of-bounds load derefs with an undef, so that they don't get
432 * left around when a driver has lowered all indirects and thus doesn't
433 * expect any array derefs at all after vars_to_ssa.
434 */
435 if (node == UNDEF_NODE) {
436 nir_undef_instr *undef =
437 nir_undef_instr_create(state->shader,
438 load_instr->num_components,
439 load_instr->def.bit_size);
440
441 nir_instr_insert_before(&load_instr->instr, &undef->instr);
442 nir_instr_remove(&load_instr->instr);
443
444 nir_def_rewrite_uses(&load_instr->def, &undef->def);
445 return true;
446 }
447
448 if (node->loads == NULL)
449 node->loads = _mesa_pointer_set_create(state->dead_ctx);
450
451 _mesa_set_add(node->loads, load_instr);
452
453 return false;
454 }
455
456 static bool
register_store_instr(nir_intrinsic_instr * store_instr,struct lower_variables_state * state)457 register_store_instr(nir_intrinsic_instr *store_instr,
458 struct lower_variables_state *state)
459 {
460 nir_deref_instr *deref = nir_src_as_deref(store_instr->src[0]);
461 struct deref_node *node = get_deref_node(deref, state);
462
463 /* Drop out-of-bounds store derefs, so that they don't get left around when a
464 * driver has lowered all indirects and thus doesn't expect any array derefs
465 * at all after vars_to_ssa.
466 */
467 if (node == UNDEF_NODE) {
468 nir_instr_remove(&store_instr->instr);
469 return true;
470 }
471
472 if (node == NULL)
473 return false;
474
475 if (node->stores == NULL)
476 node->stores = _mesa_pointer_set_create(state->dead_ctx);
477
478 _mesa_set_add(node->stores, store_instr);
479
480 return false;
481 }
482
483 static void
register_copy_instr(nir_intrinsic_instr * copy_instr,struct lower_variables_state * state)484 register_copy_instr(nir_intrinsic_instr *copy_instr,
485 struct lower_variables_state *state)
486 {
487 for (unsigned idx = 0; idx < 2; idx++) {
488 nir_deref_instr *deref = nir_src_as_deref(copy_instr->src[idx]);
489 struct deref_node *node = get_deref_node(deref, state);
490 if (node == NULL || node == UNDEF_NODE)
491 continue;
492
493 if (node->copies == NULL)
494 node->copies = _mesa_pointer_set_create(state->dead_ctx);
495
496 _mesa_set_add(node->copies, copy_instr);
497 }
498 }
499
500 static bool
register_variable_uses(nir_function_impl * impl,struct lower_variables_state * state)501 register_variable_uses(nir_function_impl *impl,
502 struct lower_variables_state *state)
503 {
504 bool progress = false;
505
506 nir_foreach_block(block, impl) {
507 nir_foreach_instr_safe(instr, block) {
508 switch (instr->type) {
509 case nir_instr_type_deref: {
510 nir_deref_instr *deref = nir_instr_as_deref(instr);
511
512 if (deref->deref_type == nir_deref_type_var &&
513 nir_deref_instr_has_complex_use(deref, 0))
514 register_complex_use(deref, state);
515
516 break;
517 }
518
519 case nir_instr_type_intrinsic: {
520 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
521
522 switch (intrin->intrinsic) {
523 case nir_intrinsic_load_deref:
524 progress = register_load_instr(intrin, state) || progress;
525 break;
526
527 case nir_intrinsic_store_deref:
528 progress = register_store_instr(intrin, state) || progress;
529 break;
530
531 case nir_intrinsic_copy_deref:
532 register_copy_instr(intrin, state);
533 break;
534
535 default:
536 continue;
537 }
538 break;
539 }
540
541 default:
542 break;
543 }
544 }
545 }
546 return progress;
547 }
548
549 /* Walks over all of the copy instructions to or from the given deref_node
550 * and lowers them to load/store intrinsics.
551 */
552 static void
lower_copies_to_load_store(struct deref_node * node,struct lower_variables_state * state)553 lower_copies_to_load_store(struct deref_node *node,
554 struct lower_variables_state *state)
555 {
556 if (!node->copies)
557 return;
558
559 nir_builder b = nir_builder_create(state->impl);
560
561 set_foreach(node->copies, copy_entry) {
562 nir_intrinsic_instr *copy = (void *)copy_entry->key;
563
564 nir_lower_deref_copy_instr(&b, copy);
565
566 for (unsigned i = 0; i < 2; ++i) {
567 nir_deref_instr *arg_deref = nir_src_as_deref(copy->src[i]);
568 struct deref_node *arg_node = get_deref_node(arg_deref, state);
569
570 /* Only bother removing copy entries for other nodes */
571 if (arg_node == NULL || arg_node == node)
572 continue;
573
574 struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
575 assert(arg_entry);
576 _mesa_set_remove(arg_node->copies, arg_entry);
577 }
578
579 nir_instr_remove(©->instr);
580 }
581
582 node->copies = NULL;
583 }
584
585 static nir_def *
deref_vec_component(nir_deref_instr * deref)586 deref_vec_component(nir_deref_instr *deref)
587 {
588 if (deref->deref_type != nir_deref_type_array) {
589 assert(glsl_type_is_vector_or_scalar(deref->type));
590 return NULL;
591 }
592
593 nir_deref_instr *parent = nir_deref_instr_parent(deref);
594 if (glsl_type_is_vector_or_scalar(parent->type)) {
595 assert(glsl_type_is_scalar(deref->type));
596 return deref->arr.index.ssa;
597 } else {
598 assert(glsl_type_is_vector_or_scalar(deref->type));
599 return NULL;
600 }
601 }
602
603 /* Performs variable renaming
604 *
605 * This algorithm is very similar to the one outlined in "Efficiently
606 * Computing Static Single Assignment Form and the Control Dependence
607 * Graph" by Cytron et al. The primary difference is that we only put one
608 * SSA def on the stack per block.
609 */
610 static bool
rename_variables(struct lower_variables_state * state)611 rename_variables(struct lower_variables_state *state)
612 {
613 nir_builder b = nir_builder_create(state->impl);
614
615 nir_foreach_block(block, state->impl) {
616 nir_foreach_instr_safe(instr, block) {
617 if (instr->type != nir_instr_type_intrinsic)
618 continue;
619
620 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
621
622 switch (intrin->intrinsic) {
623 case nir_intrinsic_load_deref: {
624 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
625 if (!nir_deref_mode_must_be(deref, nir_var_function_temp))
626 continue;
627
628 struct deref_node *node = get_deref_node(deref, state);
629 if (node == NULL)
630 continue;
631
632 /* Should have been removed before rename_variables(). */
633 assert(node != UNDEF_NODE);
634
635 if (!node->lower_to_ssa)
636 continue;
637
638 nir_def *val =
639 nir_phi_builder_value_get_block_def(node->pb_value, block);
640
641 /* As tempting as it is to just rewrite the uses of our load
642 * instruction with the value we got out of the phi builder, we
643 * can't do that without risking messing ourselves up. In
644 * particular, the get_deref_node() function we call during
645 * variable renaming uses nir_src_is_const() to determine which
646 * deref node to fetch. If we propagate directly, we may end up
647 * propagating a constant into an array index, changing the
648 * behavior of get_deref_node() for that deref and invalidating
649 * our analysis.
650 *
651 * With enough work, we could probably make our analysis and data
652 * structures robust against this but it would make everything
653 * more complicated to reason about. It's easier to just insert
654 * a mov and let copy-prop clean up after us. This pass is
655 * complicated enough as-is.
656 */
657 b.cursor = nir_before_instr(&intrin->instr);
658 val = nir_mov(&b, val);
659
660 assert(val->bit_size == intrin->def.bit_size);
661
662 nir_def *comp = deref_vec_component(deref);
663 if (comp == NULL) {
664 assert(val->num_components == intrin->def.num_components);
665 } else {
666 assert(intrin->def.num_components == 1);
667 b.cursor = nir_before_instr(&intrin->instr);
668 val = nir_vector_extract(&b, val, comp);
669 }
670
671 nir_def_replace(&intrin->def, val);
672 break;
673 }
674
675 case nir_intrinsic_store_deref: {
676 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
677 if (!nir_deref_mode_must_be(deref, nir_var_function_temp))
678 continue;
679
680 struct deref_node *node = get_deref_node(deref, state);
681 if (node == NULL)
682 continue;
683
684 /* Should have been removed before rename_variables(). */
685 assert(node != UNDEF_NODE);
686
687 nir_def *value = intrin->src[1].ssa;
688
689 if (!node->lower_to_ssa)
690 continue;
691
692 assert(intrin->num_components ==
693 glsl_get_vector_elements(deref->type));
694
695 nir_def *new_def;
696 b.cursor = nir_before_instr(&intrin->instr);
697
698 nir_def *comp = deref_vec_component(deref);
699 unsigned wrmask = nir_intrinsic_write_mask(intrin);
700 if (comp != NULL) {
701 assert(wrmask == 1 && intrin->num_components == 1);
702 nir_def *old_def =
703 nir_phi_builder_value_get_block_def(node->pb_value, block);
704 new_def = nir_vector_insert(&b, old_def, value, comp);
705 } else if (wrmask == (1 << intrin->num_components) - 1) {
706 /* Whole variable store - just copy the source. Note that
707 * intrin->num_components and value->num_components
708 * may differ.
709 */
710 unsigned swiz[NIR_MAX_VEC_COMPONENTS];
711 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
712 swiz[i] = i < intrin->num_components ? i : 0;
713
714 new_def = nir_swizzle(&b, value, swiz,
715 intrin->num_components);
716 } else {
717 nir_def *old_def =
718 nir_phi_builder_value_get_block_def(node->pb_value, block);
719 /* For writemasked store_var intrinsics, we combine the newly
720 * written values with the existing contents of unwritten
721 * channels, creating a new SSA value for the whole vector.
722 */
723 nir_scalar srcs[NIR_MAX_VEC_COMPONENTS];
724 for (unsigned i = 0; i < intrin->num_components; i++) {
725 if (wrmask & (1 << i)) {
726 srcs[i] = nir_get_scalar(value, i);
727 } else {
728 srcs[i] = nir_get_scalar(old_def, i);
729 }
730 }
731 new_def = nir_vec_scalars(&b, srcs, intrin->num_components);
732 }
733
734 nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
735 nir_instr_remove(&intrin->instr);
736 break;
737 }
738
739 default:
740 break;
741 }
742 }
743 }
744
745 return true;
746 }
747
748 /** Implements a pass to lower variable uses to SSA values
749 *
750 * This path walks the list of instructions and tries to lower as many
751 * local variable load/store operations to SSA defs and uses as it can.
752 * The process involves four passes:
753 *
754 * 1) Iterate over all of the instructions and mark where each local
755 * variable deref is used in a load, store, or copy. While we're at
756 * it, we keep track of all of the fully-qualified (no wildcards) and
757 * fully-direct references we see and store them in the
758 * direct_deref_nodes hash table.
759 *
760 * 2) Walk over the list of fully-qualified direct derefs generated in
761 * the previous pass. For each deref, we determine if it can ever be
762 * aliased, i.e. if there is an indirect reference anywhere that may
763 * refer to it. If it cannot be aliased, we mark it for lowering to an
764 * SSA value. At this point, we lower any var_copy instructions that
765 * use the given deref to load/store operations.
766 *
767 * 3) Walk over the list of derefs we plan to lower to SSA values and
768 * insert phi nodes as needed.
769 *
770 * 4) Perform "variable renaming" by replacing the load/store instructions
771 * with SSA definitions and SSA uses.
772 */
773 static bool
nir_lower_vars_to_ssa_impl(nir_function_impl * impl)774 nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
775 {
776 struct lower_variables_state state;
777
778 state.shader = impl->function->shader;
779 state.dead_ctx = ralloc_context(state.shader);
780 state.impl = impl;
781
782 state.deref_var_nodes = _mesa_pointer_hash_table_create(state.dead_ctx);
783 exec_list_make_empty(&state.direct_deref_nodes);
784
785 /* Build the initial deref structures and direct_deref_nodes table */
786 state.add_to_direct_deref_nodes = true;
787
788 bool progress = register_variable_uses(impl, &state);
789
790 nir_metadata_require(impl, nir_metadata_block_index);
791
792 /* We're about to iterate through direct_deref_nodes. Don't modify it. */
793 state.add_to_direct_deref_nodes = false;
794
795 foreach_list_typed_safe(struct deref_node, node, direct_derefs_link,
796 &state.direct_deref_nodes) {
797 nir_deref_path *path = &node->path;
798
799 assert(path->path[0]->deref_type == nir_deref_type_var);
800
801 /* We don't build deref nodes for non-local variables */
802 assert(path->path[0]->var->data.mode == nir_var_function_temp);
803
804 if (path_may_be_aliased(path, &state)) {
805 exec_node_remove(&node->direct_derefs_link);
806 continue;
807 }
808
809 node->lower_to_ssa = true;
810 progress = true;
811
812 foreach_deref_node_match(path, lower_copies_to_load_store, &state);
813 }
814
815 if (!progress) {
816 nir_metadata_preserve(impl, nir_metadata_all);
817 return false;
818 }
819
820 nir_metadata_require(impl, nir_metadata_dominance);
821
822 /* We may have lowered some copy instructions to load/store
823 * instructions. The uses from the copy instructions hav already been
824 * removed but we need to rescan to ensure that the uses from the newly
825 * added load/store instructions are registered. We need this
826 * information for phi node insertion below.
827 */
828 register_variable_uses(impl, &state);
829
830 state.phi_builder = nir_phi_builder_create(state.impl);
831
832 BITSET_WORD *store_blocks =
833 ralloc_array(state.dead_ctx, BITSET_WORD,
834 BITSET_WORDS(state.impl->num_blocks));
835 foreach_list_typed(struct deref_node, node, direct_derefs_link,
836 &state.direct_deref_nodes) {
837 if (!node->lower_to_ssa)
838 continue;
839
840 memset(store_blocks, 0,
841 BITSET_WORDS(state.impl->num_blocks) * sizeof(*store_blocks));
842
843 assert(node->path.path[0]->var->constant_initializer == NULL &&
844 node->path.path[0]->var->pointer_initializer == NULL);
845
846 if (node->stores) {
847 set_foreach(node->stores, store_entry) {
848 nir_intrinsic_instr *store =
849 (nir_intrinsic_instr *)store_entry->key;
850 BITSET_SET(store_blocks, store->instr.block->index);
851 }
852 }
853
854 node->pb_value =
855 nir_phi_builder_add_value(state.phi_builder,
856 glsl_get_vector_elements(node->type),
857 glsl_get_bit_size(node->type),
858 store_blocks);
859 }
860
861 rename_variables(&state);
862
863 nir_phi_builder_finish(state.phi_builder);
864
865 nir_metadata_preserve(impl, nir_metadata_control_flow);
866
867 ralloc_free(state.dead_ctx);
868
869 return progress;
870 }
871
872 bool
nir_lower_vars_to_ssa(nir_shader * shader)873 nir_lower_vars_to_ssa(nir_shader *shader)
874 {
875 bool progress = false;
876
877 nir_foreach_function_impl(impl, shader) {
878 progress |= nir_lower_vars_to_ssa_impl(impl);
879 }
880
881 return progress;
882 }
883