1 /*
2 * Copyright 2023 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "util/bitscan.h"
7 #include "util/hash_table.h"
8 #include "util/list.h"
9 #include "util/ralloc.h"
10 #include "nir.h"
11 #include "nir_builder.h"
12 #include "nir_builder_opcodes.h"
13 #include "nir_intrinsics_indices.h"
14
15 /*
16 * If we have NIR like
17 *
18 * x = load_reg reg
19 * use(x)
20 *
21 * we can translate to a single instruction use(reg) in one step by inspecting
22 * the parent instruction of x, which is convenient for instruction selection
23 * that historically used registers.
24 *
25 * However, if we have an intervening store
26 *
27 * x = load_reg reg
28 * store_reg reg, y
29 * use(x)
30 *
31 * we are no longer able to translate to use(reg), since reg has been
32 * overwritten. We could detect the write-after-read hazard at instruction
33 * selection time, but that requires an O(n) walk of instructions for each
34 * register source read, leading to quadratic compile time. Instead, we ensure
35 * this hazard does not happen and then use the simple O(1) translation.
36 *
37 * We say that a load_register is "trivial" if:
38 *
39 * 1. every use is in the same block as the load_reg
40 *
41 * 2. there is no intervening store_register (write-after-read) between the
42 * load and the use.
43 *
44 * Similar, a store_register is trivial if:
45 *
46 * 1. the value stored has exactly one use (the store)
47 *
48 * 2. the value is written in the same block as the store
49 *
50 * 3. the live range of the components of the value being stored does not
51 * overlap the live range of the destination of any load_reg or the data
52 * source components of any store_reg operating on that same register. The
53 * live ranges of the data portions of two store_reg intrinscis may overlap
54 * if they have non-intersecting write masks.
55 *
56 * 3. the producer is not a load_const or ssa_undef (as these historically
57 * could not write to registers so backends are expecting SSA here), or a
58 * load_reg (since backends need a move to copy between registers)
59 *
60 * 4. if indirect, the indirect index is live at the producer.
61 *
62 * This pass inserts copies to ensure that all load_reg/store_reg are trivial.
63 */
64
65 /*
66 * In order to allow for greater freedom elsewhere in the pass, move all
67 * reg_decl intrinsics to the top of their block. This ensures in particular
68 * that decl_reg intrinsics occur before the producer of the SSA value
69 * consumed by a store_reg whenever they're all in the same block.
70 */
71 static void
move_reg_decls(nir_block * block)72 move_reg_decls(nir_block *block)
73 {
74 nir_cursor cursor = nir_before_block(block);
75
76 nir_foreach_instr_safe(instr, block) {
77 if (instr->type != nir_instr_type_intrinsic)
78 continue;
79
80 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
81 if (intr->intrinsic != nir_intrinsic_decl_reg)
82 continue;
83
84 nir_instr_move(cursor, instr);
85 cursor = nir_after_instr(instr);
86 }
87 }
88
89 /*
90 * Any load can be trivialized by copying immediately after the load and then
91 * rewriting uses of the load to read from the copy. That has no functional
92 * change, but it means that for every use of the load (the copy), there is no
93 * intervening instruction and in particular no intervening store on any control
94 * flow path. Therefore the load is trivial.
95 */
96 static void
trivialize_load(nir_intrinsic_instr * load)97 trivialize_load(nir_intrinsic_instr *load)
98 {
99 assert(nir_is_load_reg(load));
100
101 nir_builder b = nir_builder_at(nir_after_instr(&load->instr));
102 nir_def *copy = nir_mov(&b, &load->def);
103 copy->divergent = load->def.divergent;
104 nir_def_rewrite_uses_after(&load->def, copy, copy->parent_instr);
105
106 assert(list_is_singular(&load->def.uses));
107 }
108
109 struct trivialize_src_state {
110 nir_block *block;
111 BITSET_WORD *trivial_loads;
112 };
113
114 static bool
trivialize_src(nir_src * src,void * state_)115 trivialize_src(nir_src *src, void *state_)
116 {
117 struct trivialize_src_state *state = state_;
118
119 nir_instr *parent = src->ssa->parent_instr;
120 if (parent->type != nir_instr_type_intrinsic)
121 return true;
122
123 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
124 if (!nir_is_load_reg(intr))
125 return true;
126
127 if (intr->instr.block != state->block ||
128 !BITSET_TEST(state->trivial_loads, intr->def.index))
129 trivialize_load(intr);
130
131 return true;
132 }
133
134 static void
trivialize_loads(nir_function_impl * impl,nir_block * block)135 trivialize_loads(nir_function_impl *impl, nir_block *block)
136 {
137 struct trivialize_src_state state = {
138 .block = block,
139 .trivial_loads = calloc(BITSET_WORDS(impl->ssa_alloc),
140 sizeof(BITSET_WORD)),
141 };
142
143 nir_foreach_instr_safe(instr, block) {
144 /* Our cross-block serialization can't handle phis */
145 assert(instr->type != nir_instr_type_phi);
146
147 nir_foreach_src(instr, trivialize_src, &state);
148
149 /* We maintain a set of register loads which can be accessed trivially.
150 * When we hit a load, it is added to the trivial set. When the register
151 * is stored, all loads from the register become nontrivial. That means
152 * the window between the load and the store is where the register can be
153 * accessed legally.
154 *
155 * Note that we must track loads and not registers to correctly handle
156 * cases like:
157 *
158 * %1 = @load_reg %0
159 * %2 = @load_reg %0
160 * @store_reg data, %0
161 * use %1
162 *
163 * This is pretty obscure but it isn't a big deal to handle.
164 */
165 if (instr->type == nir_instr_type_intrinsic) {
166 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
167
168 /* We don't consider indirect loads to ever be trivial */
169 if (intr->intrinsic == nir_intrinsic_load_reg_indirect) {
170 trivialize_load(intr);
171 } else if (intr->intrinsic == nir_intrinsic_load_reg) {
172 BITSET_SET(state.trivial_loads, intr->def.index);
173 } else if (nir_is_store_reg(intr)) {
174 nir_intrinsic_instr *reg = nir_reg_get_decl(intr->src[1].ssa);
175
176 nir_foreach_reg_load(load, reg) {
177 nir_instr *parent = nir_src_parent_instr(load);
178 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
179
180 BITSET_CLEAR(state.trivial_loads, intr->def.index);
181 }
182 }
183 }
184 }
185
186 /* Also check the condition of the next if */
187 nir_if *nif = nir_block_get_following_if(block);
188 if (nif)
189 trivialize_src(&nif->condition, &state);
190
191 free(state.trivial_loads);
192 }
193
194 /*
195 * Any store can be made trivial by inserting a copy of the value immediately
196 * before the store and reading from the copy instead. Proof:
197 *
198 * 1. The new value stored (the copy result) is used exactly once.
199 *
200 * 2. No intervening instructions between the copy and the store.
201 *
202 * 3. The copy is ALU, not load_const or ssa_undef.
203 *
204 * 4. The indirect index must be live at the store, which means it is also
205 * live at the copy inserted immediately before the store (same live-in set),
206 * so it is live at the new producer (the copy).
207 */
208 static void
isolate_store(nir_intrinsic_instr * store)209 isolate_store(nir_intrinsic_instr *store)
210 {
211 assert(nir_is_store_reg(store));
212
213 nir_builder b = nir_builder_at(nir_before_instr(&store->instr));
214 nir_def *copy = nir_mov(&b, store->src[0].ssa);
215 copy->divergent = store->src[0].ssa->divergent;
216 nir_src_rewrite(&store->src[0], copy);
217 }
218
219 static void
clear_store(nir_intrinsic_instr * store,unsigned num_reg_components,nir_intrinsic_instr ** reg_stores)220 clear_store(nir_intrinsic_instr *store,
221 unsigned num_reg_components,
222 nir_intrinsic_instr **reg_stores)
223 {
224 nir_component_mask_t mask = nir_intrinsic_write_mask(store);
225 u_foreach_bit(c, mask) {
226 assert(c < num_reg_components);
227 assert(reg_stores[c] == store);
228 reg_stores[c] = NULL;
229 }
230 }
231
232 static void
clear_reg_stores(nir_def * reg,struct hash_table * possibly_trivial_stores)233 clear_reg_stores(nir_def *reg,
234 struct hash_table *possibly_trivial_stores)
235 {
236 /* At any given point in store trivialize pass, every store in the current
237 * block is either trivial or in the possibly_trivial_stores map.
238 */
239 struct hash_entry *entry =
240 _mesa_hash_table_search(possibly_trivial_stores, reg);
241 if (entry == NULL)
242 return;
243
244 nir_intrinsic_instr **stores = entry->data;
245 nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
246 unsigned num_components = nir_intrinsic_num_components(decl);
247
248 for (unsigned c = 0; c < num_components; c++) {
249 if (stores[c] == NULL)
250 continue;
251
252 clear_store(stores[c], num_components, stores);
253 }
254 }
255
256 static void
trivialize_store(nir_intrinsic_instr * store,struct hash_table * possibly_trivial_stores)257 trivialize_store(nir_intrinsic_instr *store,
258 struct hash_table *possibly_trivial_stores)
259 {
260 nir_def *reg = store->src[1].ssa;
261
262 /* At any given point in store trivialize pass, every store in the current
263 * block is either trivial or in the possibly_trivial_stores map.
264 */
265 struct hash_entry *entry =
266 _mesa_hash_table_search(possibly_trivial_stores, reg);
267 if (entry == NULL)
268 return;
269
270 nir_intrinsic_instr **stores = entry->data;
271 nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
272 unsigned num_components = nir_intrinsic_num_components(decl);
273
274 nir_component_mask_t found = 0;
275 for (unsigned c = 0; c < num_components; c++) {
276 if (stores[c] == store)
277 found |= BITFIELD_BIT(c);
278 }
279 if (!found)
280 return;
281
282 /* A store can't be only partially trivial */
283 assert(found == nir_intrinsic_write_mask(store));
284
285 isolate_store(store);
286 clear_store(store, num_components, stores);
287 }
288
289 static void
trivialize_reg_stores(nir_def * reg,nir_component_mask_t mask,struct hash_table * possibly_trivial_stores)290 trivialize_reg_stores(nir_def *reg, nir_component_mask_t mask,
291 struct hash_table *possibly_trivial_stores)
292 {
293 /* At any given point in store trivialize pass, every store in the current
294 * block is either trivial or in the possibly_trivial_stores map.
295 */
296 struct hash_entry *entry =
297 _mesa_hash_table_search(possibly_trivial_stores, reg);
298 if (entry == NULL)
299 return;
300
301 nir_intrinsic_instr **stores = entry->data;
302 nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
303 unsigned num_components = nir_intrinsic_num_components(decl);
304
305 u_foreach_bit(c, mask) {
306 assert(c < num_components);
307 if (stores[c] == NULL)
308 continue;
309
310 isolate_store(stores[c]);
311 clear_store(stores[c], num_components, stores);
312 }
313 }
314
315 /*
316 * Trivialize for read-after-write hazards, given a load that is between the def
317 * and store.
318 */
319 static void
trivialize_read_after_write(nir_intrinsic_instr * load,struct hash_table * possibly_trivial_stores)320 trivialize_read_after_write(nir_intrinsic_instr *load,
321 struct hash_table *possibly_trivial_stores)
322 {
323 assert(nir_is_load_reg(load));
324
325 unsigned nr = load->def.num_components;
326 trivialize_reg_stores(load->src[0].ssa, nir_component_mask(nr),
327 possibly_trivial_stores);
328 }
329
330 static bool
clear_def(nir_def * def,void * state)331 clear_def(nir_def *def, void *state)
332 {
333 struct hash_table *possibly_trivial_stores = state;
334
335 nir_foreach_use(src, def) {
336 if (nir_src_is_if(src))
337 continue;
338
339 nir_instr *parent = nir_src_parent_instr(src);
340 if (parent->type != nir_instr_type_intrinsic)
341 continue;
342
343 nir_intrinsic_instr *store = nir_instr_as_intrinsic(parent);
344 if (!nir_is_store_reg(store))
345 continue;
346
347 /* Anything global has already been trivialized and can be ignored */
348 if (parent->block != def->parent_instr->block)
349 continue;
350
351 if (def == store->src[0].ssa) {
352 /* We encountered a value which is written by a store_reg so, if this
353 * store is still in possibly_trivial_stores, it is trivial and we
354 * can remove it from the set.
355 */
356 assert(list_is_singular(&def->uses));
357 clear_reg_stores(store->src[1].ssa, possibly_trivial_stores);
358 } else {
359 /* We encoutered either the ineirect index or the decl_reg (unlikely)
360 * before the value while iterating backwards. Trivialize the store
361 * now to maintain dominance.
362 */
363 trivialize_store(store, possibly_trivial_stores);
364 }
365 }
366
367 return false;
368 }
369
370 /*
371 * If a load_reg will be folded into this instruction, we need to handle
372 * read-after-write hazards, the same as if we hit a load_reg directly.
373 */
374 static bool
trivialize_source(nir_src * src,void * state)375 trivialize_source(nir_src *src, void *state)
376 {
377 struct hash_table *possibly_trivial_stores = state;
378
379 nir_intrinsic_instr *load_reg = nir_load_reg_for_def(src->ssa);
380 if (load_reg)
381 trivialize_read_after_write(load_reg, possibly_trivial_stores);
382
383 return true;
384 }
385
386 static void
trivialize_stores(nir_function_impl * impl,nir_block * block)387 trivialize_stores(nir_function_impl *impl, nir_block *block)
388 {
389 /* Hash table mapping decl_reg defs to a num_components-size array of
390 * nir_intrinsic_instr*s. Each component contains the pointer to the next
391 * store to that component, if one exists in the block while walking
392 * backwards that has not yet had an intervening load, or NULL otherwise.
393 * This represents the set of stores that, at the current point of iteration,
394 * could be trivial.
395 */
396 struct hash_table *possibly_trivial_stores =
397 _mesa_pointer_hash_table_create(NULL);
398
399 /* Following the algorithm directly, we would call trivialize_source() on
400 * the following if source here because we are walking instructions
401 * backwards so you process the following if before instructions in that
402 * case. However, we know a priori that possibly_trivial_stores is empty
403 * at this point so trivialize_source() is a no-op.
404 */
405
406 nir_foreach_instr_reverse_safe(instr, block) {
407 nir_foreach_def(instr, clear_def, possibly_trivial_stores);
408
409 if (instr->type == nir_instr_type_intrinsic) {
410 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
411
412 if (nir_is_load_reg(intr)) {
413 /* Read-after-write: there is a load between the def and store. */
414 trivialize_read_after_write(intr, possibly_trivial_stores);
415 } else if (nir_is_store_reg(intr)) {
416 nir_def *value = intr->src[0].ssa;
417 nir_def *reg = intr->src[1].ssa;
418 nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
419 unsigned num_components = nir_intrinsic_num_components(decl);
420 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intr);
421 bool nontrivial = false;
422
423 /* Write-after-write dependency */
424 trivialize_reg_stores(reg, write_mask, possibly_trivial_stores);
425
426 /* We don't consider indirect stores to be trivial */
427 nontrivial |= intr->intrinsic == nir_intrinsic_store_reg_indirect;
428
429 /* If there are multiple uses, not trivial */
430 nontrivial |= !list_is_singular(&value->uses);
431
432 /* SSA-only instruction types */
433 nir_instr *parent = value->parent_instr;
434 nontrivial |= (parent->type == nir_instr_type_load_const) ||
435 (parent->type == nir_instr_type_undef);
436
437 /* Must be written in the same block */
438 nontrivial |= (parent->block != block);
439
440 /* Don't allow write masking with non-ALU types for compatibility,
441 * since other types didn't have write masks in old NIR.
442 */
443 nontrivial |=
444 (write_mask != nir_component_mask(num_components) &&
445 parent->type != nir_instr_type_alu);
446
447 /* Need a move for register copies */
448 nontrivial |= parent->type == nir_instr_type_intrinsic &&
449 nir_is_load_reg(nir_instr_as_intrinsic(parent));
450
451 if (nontrivial) {
452 isolate_store(intr);
453 } else {
454 /* This store might be trivial. Record it. */
455 nir_intrinsic_instr **stores = NULL;
456
457 struct hash_entry *entry =
458 _mesa_hash_table_search(possibly_trivial_stores, reg);
459
460 if (entry) {
461 stores = entry->data;
462 } else {
463 stores = rzalloc_array(possibly_trivial_stores,
464 nir_intrinsic_instr *,
465 num_components);
466
467 _mesa_hash_table_insert(possibly_trivial_stores, reg, stores);
468 }
469
470 u_foreach_bit(c, write_mask) {
471 assert(c < num_components);
472 assert(stores[c] == NULL);
473 stores[c] = intr;
474 }
475 }
476 }
477 }
478
479 /* Once we have trivialized loads, we are guaranteed that no store_reg
480 * exists in the live range of the destination of a load_reg for the
481 * same register. When trivializing stores, we must further ensure that
482 * this holds for the entire live range of the data source of the
483 * store_reg and not just for the store_reg instruction itself. Because
484 * values are always killed by sources, we can determine live range
485 * interference when walking backwards by looking at the sources of each
486 * instruction which read from a load_reg and trivializing any store_reg
487 * which interfere with that load.
488 *
489 * We trivialize sources at the end, because we iterate backwards and in
490 * program order the sources are read first.
491 */
492 nir_foreach_src(instr, trivialize_source, possibly_trivial_stores);
493 }
494
495 _mesa_hash_table_destroy(possibly_trivial_stores, NULL);
496 }
497
498 void
nir_trivialize_registers(nir_shader * s)499 nir_trivialize_registers(nir_shader *s)
500 {
501 nir_foreach_function_impl(impl, s) {
502 /* All decl_reg intrinsics are in the start block. */
503 move_reg_decls(nir_start_block(impl));
504
505 nir_foreach_block(block, impl) {
506 trivialize_loads(impl, block);
507 trivialize_stores(impl, block);
508 }
509 }
510 }
511