xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_trivialize_registers.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "util/bitscan.h"
7 #include "util/hash_table.h"
8 #include "util/list.h"
9 #include "util/ralloc.h"
10 #include "nir.h"
11 #include "nir_builder.h"
12 #include "nir_builder_opcodes.h"
13 #include "nir_intrinsics_indices.h"
14 
15 /*
16  * If we have NIR like
17  *
18  *    x = load_reg reg
19  *    use(x)
20  *
21  * we can translate to a single instruction use(reg) in one step by inspecting
22  * the parent instruction of x, which is convenient for instruction selection
23  * that historically used registers.
24  *
25  * However, if we have an intervening store
26  *
27  *    x = load_reg reg
28  *    store_reg reg, y
29  *    use(x)
30  *
31  * we are no longer able to translate to use(reg), since reg has been
32  * overwritten. We could detect the write-after-read hazard at instruction
33  * selection time, but that requires an O(n) walk of instructions for each
34  * register source read, leading to quadratic compile time. Instead, we ensure
35  * this hazard does not happen and then use the simple O(1) translation.
36  *
37  * We say that a load_register is "trivial" if:
38  *
39  *  1. every use is in the same block as the load_reg
40  *
41  *  2. there is no intervening store_register (write-after-read) between the
42  *     load and the use.
43  *
44  * Similar, a store_register is trivial if:
45  *
46  * 1. the value stored has exactly one use (the store)
47  *
48  * 2. the value is written in the same block as the store
49  *
50  * 3. the live range of the components of the value being stored does not
51  *    overlap the live range of the destination of any load_reg or the data
52  *    source components of any store_reg operating on that same register.  The
53  *    live ranges of the data portions of two store_reg intrinscis may overlap
54  *    if they have non-intersecting write masks.
55  *
56  * 3. the producer is not a load_const or ssa_undef (as these historically
57  *    could not write to registers so backends are expecting SSA here), or a
58  *    load_reg (since backends need a move to copy between registers)
59  *
60  * 4. if indirect, the indirect index is live at the producer.
61  *
62  * This pass inserts copies to ensure that all load_reg/store_reg are trivial.
63  */
64 
65 /*
66  * In order to allow for greater freedom elsewhere in the pass, move all
67  * reg_decl intrinsics to the top of their block.  This ensures in particular
68  * that decl_reg intrinsics occur before the producer of the SSA value
69  * consumed by a store_reg whenever they're all in the same block.
70  */
71 static void
move_reg_decls(nir_block * block)72 move_reg_decls(nir_block *block)
73 {
74    nir_cursor cursor = nir_before_block(block);
75 
76    nir_foreach_instr_safe(instr, block) {
77       if (instr->type != nir_instr_type_intrinsic)
78          continue;
79 
80       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
81       if (intr->intrinsic != nir_intrinsic_decl_reg)
82          continue;
83 
84       nir_instr_move(cursor, instr);
85       cursor = nir_after_instr(instr);
86    }
87 }
88 
89 /*
90  * Any load can be trivialized by copying immediately after the load and then
91  * rewriting uses of the load to read from the copy. That has no functional
92  * change, but it means that for every use of the load (the copy), there is no
93  * intervening instruction and in particular no intervening store on any control
94  * flow path. Therefore the load is trivial.
95  */
96 static void
trivialize_load(nir_intrinsic_instr * load)97 trivialize_load(nir_intrinsic_instr *load)
98 {
99    assert(nir_is_load_reg(load));
100 
101    nir_builder b = nir_builder_at(nir_after_instr(&load->instr));
102    nir_def *copy = nir_mov(&b, &load->def);
103    copy->divergent = load->def.divergent;
104    nir_def_rewrite_uses_after(&load->def, copy, copy->parent_instr);
105 
106    assert(list_is_singular(&load->def.uses));
107 }
108 
109 struct trivialize_src_state {
110    nir_block *block;
111    BITSET_WORD *trivial_loads;
112 };
113 
114 static bool
trivialize_src(nir_src * src,void * state_)115 trivialize_src(nir_src *src, void *state_)
116 {
117    struct trivialize_src_state *state = state_;
118 
119    nir_instr *parent = src->ssa->parent_instr;
120    if (parent->type != nir_instr_type_intrinsic)
121       return true;
122 
123    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
124    if (!nir_is_load_reg(intr))
125       return true;
126 
127    if (intr->instr.block != state->block ||
128        !BITSET_TEST(state->trivial_loads, intr->def.index))
129       trivialize_load(intr);
130 
131    return true;
132 }
133 
134 static void
trivialize_loads(nir_function_impl * impl,nir_block * block)135 trivialize_loads(nir_function_impl *impl, nir_block *block)
136 {
137    struct trivialize_src_state state = {
138       .block = block,
139       .trivial_loads = calloc(BITSET_WORDS(impl->ssa_alloc),
140                               sizeof(BITSET_WORD)),
141    };
142 
143    nir_foreach_instr_safe(instr, block) {
144       /* Our cross-block serialization can't handle phis */
145       assert(instr->type != nir_instr_type_phi);
146 
147       nir_foreach_src(instr, trivialize_src, &state);
148 
149       /* We maintain a set of register loads which can be accessed trivially.
150        * When we hit a load, it is added to the trivial set. When the register
151        * is stored, all loads from the register become nontrivial. That means
152        * the window between the load and the store is where the register can be
153        * accessed legally.
154        *
155        * Note that we must track loads and not registers to correctly handle
156        * cases like:
157        *
158        *    %1 = @load_reg %0
159        *    %2 = @load_reg %0
160        *    @store_reg data, %0
161        *    use %1
162        *
163        * This is pretty obscure but it isn't a big deal to handle.
164        */
165       if (instr->type == nir_instr_type_intrinsic) {
166          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
167 
168          /* We don't consider indirect loads to ever be trivial */
169          if (intr->intrinsic == nir_intrinsic_load_reg_indirect) {
170             trivialize_load(intr);
171          } else if (intr->intrinsic == nir_intrinsic_load_reg) {
172             BITSET_SET(state.trivial_loads, intr->def.index);
173          } else if (nir_is_store_reg(intr)) {
174             nir_intrinsic_instr *reg = nir_reg_get_decl(intr->src[1].ssa);
175 
176             nir_foreach_reg_load(load, reg) {
177                nir_instr *parent = nir_src_parent_instr(load);
178                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
179 
180                BITSET_CLEAR(state.trivial_loads, intr->def.index);
181             }
182          }
183       }
184    }
185 
186    /* Also check the condition of the next if */
187    nir_if *nif = nir_block_get_following_if(block);
188    if (nif)
189       trivialize_src(&nif->condition, &state);
190 
191    free(state.trivial_loads);
192 }
193 
194 /*
195  * Any store can be made trivial by inserting a copy of the value immediately
196  * before the store and reading from the copy instead. Proof:
197  *
198  * 1. The new value stored (the copy result) is used exactly once.
199  *
200  * 2. No intervening instructions between the copy and the store.
201  *
202  * 3. The copy is ALU, not load_const or ssa_undef.
203  *
204  * 4. The indirect index must be live at the store, which means it is also
205  * live at the copy inserted immediately before the store (same live-in set),
206  * so it is live at the new producer (the copy).
207  */
208 static void
isolate_store(nir_intrinsic_instr * store)209 isolate_store(nir_intrinsic_instr *store)
210 {
211    assert(nir_is_store_reg(store));
212 
213    nir_builder b = nir_builder_at(nir_before_instr(&store->instr));
214    nir_def *copy = nir_mov(&b, store->src[0].ssa);
215    copy->divergent = store->src[0].ssa->divergent;
216    nir_src_rewrite(&store->src[0], copy);
217 }
218 
219 static void
clear_store(nir_intrinsic_instr * store,unsigned num_reg_components,nir_intrinsic_instr ** reg_stores)220 clear_store(nir_intrinsic_instr *store,
221             unsigned num_reg_components,
222             nir_intrinsic_instr **reg_stores)
223 {
224    nir_component_mask_t mask = nir_intrinsic_write_mask(store);
225    u_foreach_bit(c, mask) {
226       assert(c < num_reg_components);
227       assert(reg_stores[c] == store);
228       reg_stores[c] = NULL;
229    }
230 }
231 
232 static void
clear_reg_stores(nir_def * reg,struct hash_table * possibly_trivial_stores)233 clear_reg_stores(nir_def *reg,
234                  struct hash_table *possibly_trivial_stores)
235 {
236    /* At any given point in store trivialize pass, every store in the current
237     * block is either trivial or in the possibly_trivial_stores map.
238     */
239    struct hash_entry *entry =
240       _mesa_hash_table_search(possibly_trivial_stores, reg);
241    if (entry == NULL)
242       return;
243 
244    nir_intrinsic_instr **stores = entry->data;
245    nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
246    unsigned num_components = nir_intrinsic_num_components(decl);
247 
248    for (unsigned c = 0; c < num_components; c++) {
249       if (stores[c] == NULL)
250          continue;
251 
252       clear_store(stores[c], num_components, stores);
253    }
254 }
255 
256 static void
trivialize_store(nir_intrinsic_instr * store,struct hash_table * possibly_trivial_stores)257 trivialize_store(nir_intrinsic_instr *store,
258                  struct hash_table *possibly_trivial_stores)
259 {
260    nir_def *reg = store->src[1].ssa;
261 
262    /* At any given point in store trivialize pass, every store in the current
263     * block is either trivial or in the possibly_trivial_stores map.
264     */
265    struct hash_entry *entry =
266       _mesa_hash_table_search(possibly_trivial_stores, reg);
267    if (entry == NULL)
268       return;
269 
270    nir_intrinsic_instr **stores = entry->data;
271    nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
272    unsigned num_components = nir_intrinsic_num_components(decl);
273 
274    nir_component_mask_t found = 0;
275    for (unsigned c = 0; c < num_components; c++) {
276       if (stores[c] == store)
277          found |= BITFIELD_BIT(c);
278    }
279    if (!found)
280       return;
281 
282    /* A store can't be only partially trivial */
283    assert(found == nir_intrinsic_write_mask(store));
284 
285    isolate_store(store);
286    clear_store(store, num_components, stores);
287 }
288 
289 static void
trivialize_reg_stores(nir_def * reg,nir_component_mask_t mask,struct hash_table * possibly_trivial_stores)290 trivialize_reg_stores(nir_def *reg, nir_component_mask_t mask,
291                       struct hash_table *possibly_trivial_stores)
292 {
293    /* At any given point in store trivialize pass, every store in the current
294     * block is either trivial or in the possibly_trivial_stores map.
295     */
296    struct hash_entry *entry =
297       _mesa_hash_table_search(possibly_trivial_stores, reg);
298    if (entry == NULL)
299       return;
300 
301    nir_intrinsic_instr **stores = entry->data;
302    nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
303    unsigned num_components = nir_intrinsic_num_components(decl);
304 
305    u_foreach_bit(c, mask) {
306       assert(c < num_components);
307       if (stores[c] == NULL)
308          continue;
309 
310       isolate_store(stores[c]);
311       clear_store(stores[c], num_components, stores);
312    }
313 }
314 
315 /*
316  * Trivialize for read-after-write hazards, given a load that is between the def
317  * and store.
318  */
319 static void
trivialize_read_after_write(nir_intrinsic_instr * load,struct hash_table * possibly_trivial_stores)320 trivialize_read_after_write(nir_intrinsic_instr *load,
321                             struct hash_table *possibly_trivial_stores)
322 {
323    assert(nir_is_load_reg(load));
324 
325    unsigned nr = load->def.num_components;
326    trivialize_reg_stores(load->src[0].ssa, nir_component_mask(nr),
327                          possibly_trivial_stores);
328 }
329 
330 static bool
clear_def(nir_def * def,void * state)331 clear_def(nir_def *def, void *state)
332 {
333    struct hash_table *possibly_trivial_stores = state;
334 
335    nir_foreach_use(src, def) {
336       if (nir_src_is_if(src))
337          continue;
338 
339       nir_instr *parent = nir_src_parent_instr(src);
340       if (parent->type != nir_instr_type_intrinsic)
341          continue;
342 
343       nir_intrinsic_instr *store = nir_instr_as_intrinsic(parent);
344       if (!nir_is_store_reg(store))
345          continue;
346 
347       /* Anything global has already been trivialized and can be ignored */
348       if (parent->block != def->parent_instr->block)
349          continue;
350 
351       if (def == store->src[0].ssa) {
352          /* We encountered a value which is written by a store_reg so, if this
353           * store is still in possibly_trivial_stores, it is trivial and we
354           * can remove it from the set.
355           */
356          assert(list_is_singular(&def->uses));
357          clear_reg_stores(store->src[1].ssa, possibly_trivial_stores);
358       } else {
359          /* We encoutered either the ineirect index or the decl_reg (unlikely)
360           * before the value while iterating backwards.  Trivialize the store
361           * now to maintain dominance.
362           */
363          trivialize_store(store, possibly_trivial_stores);
364       }
365    }
366 
367    return false;
368 }
369 
370 /*
371  * If a load_reg will be folded into this instruction, we need to handle
372  * read-after-write hazards, the same as if we hit a load_reg directly.
373  */
374 static bool
trivialize_source(nir_src * src,void * state)375 trivialize_source(nir_src *src, void *state)
376 {
377    struct hash_table *possibly_trivial_stores = state;
378 
379    nir_intrinsic_instr *load_reg = nir_load_reg_for_def(src->ssa);
380    if (load_reg)
381       trivialize_read_after_write(load_reg, possibly_trivial_stores);
382 
383    return true;
384 }
385 
386 static void
trivialize_stores(nir_function_impl * impl,nir_block * block)387 trivialize_stores(nir_function_impl *impl, nir_block *block)
388 {
389    /* Hash table mapping decl_reg defs to a num_components-size array of
390     * nir_intrinsic_instr*s. Each component contains the pointer to the next
391     * store to that component, if one exists in the block while walking
392     * backwards that has not yet had an intervening load, or NULL otherwise.
393     * This represents the set of stores that, at the current point of iteration,
394     * could be trivial.
395     */
396    struct hash_table *possibly_trivial_stores =
397       _mesa_pointer_hash_table_create(NULL);
398 
399    /* Following the algorithm directly, we would call trivialize_source() on
400     * the following if source here because we are walking instructions
401     * backwards so you process the following if before instructions in that
402     * case.  However, we know a priori that possibly_trivial_stores is empty
403     * at this point so trivialize_source() is a no-op.
404     */
405 
406    nir_foreach_instr_reverse_safe(instr, block) {
407       nir_foreach_def(instr, clear_def, possibly_trivial_stores);
408 
409       if (instr->type == nir_instr_type_intrinsic) {
410          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
411 
412          if (nir_is_load_reg(intr)) {
413             /* Read-after-write: there is a load between the def and store. */
414             trivialize_read_after_write(intr, possibly_trivial_stores);
415          } else if (nir_is_store_reg(intr)) {
416             nir_def *value = intr->src[0].ssa;
417             nir_def *reg = intr->src[1].ssa;
418             nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
419             unsigned num_components = nir_intrinsic_num_components(decl);
420             nir_component_mask_t write_mask = nir_intrinsic_write_mask(intr);
421             bool nontrivial = false;
422 
423             /* Write-after-write dependency */
424             trivialize_reg_stores(reg, write_mask, possibly_trivial_stores);
425 
426             /* We don't consider indirect stores to be trivial */
427             nontrivial |= intr->intrinsic == nir_intrinsic_store_reg_indirect;
428 
429             /* If there are multiple uses, not trivial */
430             nontrivial |= !list_is_singular(&value->uses);
431 
432             /* SSA-only instruction types */
433             nir_instr *parent = value->parent_instr;
434             nontrivial |= (parent->type == nir_instr_type_load_const) ||
435                           (parent->type == nir_instr_type_undef);
436 
437             /* Must be written in the same block */
438             nontrivial |= (parent->block != block);
439 
440             /* Don't allow write masking with non-ALU types for compatibility,
441              * since other types didn't have write masks in old NIR.
442              */
443             nontrivial |=
444                (write_mask != nir_component_mask(num_components) &&
445                 parent->type != nir_instr_type_alu);
446 
447             /* Need a move for register copies */
448             nontrivial |= parent->type == nir_instr_type_intrinsic &&
449                           nir_is_load_reg(nir_instr_as_intrinsic(parent));
450 
451             if (nontrivial) {
452                isolate_store(intr);
453             } else {
454                /* This store might be trivial. Record it. */
455                nir_intrinsic_instr **stores = NULL;
456 
457                struct hash_entry *entry =
458                   _mesa_hash_table_search(possibly_trivial_stores, reg);
459 
460                if (entry) {
461                   stores = entry->data;
462                } else {
463                   stores = rzalloc_array(possibly_trivial_stores,
464                                          nir_intrinsic_instr *,
465                                          num_components);
466 
467                   _mesa_hash_table_insert(possibly_trivial_stores, reg, stores);
468                }
469 
470                u_foreach_bit(c, write_mask) {
471                   assert(c < num_components);
472                   assert(stores[c] == NULL);
473                   stores[c] = intr;
474                }
475             }
476          }
477       }
478 
479       /* Once we have trivialized loads, we are guaranteed that no store_reg
480        * exists in the live range of the destination of a load_reg for the
481        * same register.  When trivializing stores, we must further ensure that
482        * this holds for the entire live range of the data source of the
483        * store_reg and not just for the store_reg instruction itself.  Because
484        * values are always killed by sources, we can determine live range
485        * interference when walking backwards by looking at the sources of each
486        * instruction which read from a load_reg and trivializing any store_reg
487        * which interfere with that load.
488        *
489        * We trivialize sources at the end, because we iterate backwards and in
490        * program order the sources are read first.
491        */
492       nir_foreach_src(instr, trivialize_source, possibly_trivial_stores);
493    }
494 
495    _mesa_hash_table_destroy(possibly_trivial_stores, NULL);
496 }
497 
498 void
nir_trivialize_registers(nir_shader * s)499 nir_trivialize_registers(nir_shader *s)
500 {
501    nir_foreach_function_impl(impl, s) {
502       /* All decl_reg intrinsics are in the start block. */
503       move_reg_decls(nir_start_block(impl));
504 
505       nir_foreach_block(block, impl) {
506          trivialize_loads(impl, block);
507          trivialize_stores(impl, block);
508       }
509    }
510 }
511