1 /*
2 * Copyright 2023 Alyssa Rosenzweig
3 * Copyright 2023 Valve Corporation
4 * Copyright 2022 Collabora Ltd.
5 * SPDX-License-Identifier: MIT
6 */
7
8 /*
9 * Implementation of "Simple and Efficient
10 * Construction of Static Single Assignment Form", also by Braun et al.
11 * https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf
12 */
13
14 #include "util/hash_table.h"
15 #include "util/ralloc.h"
16 #include "util/u_dynarray.h"
17 #include "agx_builder.h"
18 #include "agx_compiler.h"
19 #include "agx_opcodes.h"
20
21 struct repair_block {
22 /* For a loop header, whether phis operands have been added */
23 bool sealed;
24
25 /* Sparse map: variable name -> agx_index.
26 *
27 * Definition of a variable at the end of the block.
28 */
29 struct hash_table_u64 *defs;
30 };
31
32 struct repair_ctx {
33 agx_context *shader;
34
35 /* Number of variables */
36 unsigned n;
37
38 /* Information on blocks indexed in source order */
39 struct repair_block *blocks;
40 };
41
42 static inline struct repair_block *
repair_block(struct repair_ctx * ctx,agx_block * block)43 repair_block(struct repair_ctx *ctx, agx_block *block)
44 {
45 return &ctx->blocks[block->index];
46 }
47
48 static void
record_write(struct repair_ctx * ctx,agx_block * block,unsigned node,agx_index val)49 record_write(struct repair_ctx *ctx, agx_block *block, unsigned node,
50 agx_index val)
51 {
52 assert(node < ctx->n);
53 struct hash_table_u64 *defs = repair_block(ctx, block)->defs;
54 _mesa_hash_table_u64_insert(defs, node,
55 ralloc_memdup(defs, &val, sizeof(val)));
56 }
57
58 static void add_phi_operands(struct repair_ctx *ctx, agx_block *block,
59 agx_instr *phi, agx_index node);
60
61 static agx_index
resolve_read(struct repair_ctx * ctx,agx_block * block,agx_index node)62 resolve_read(struct repair_ctx *ctx, agx_block *block, agx_index node)
63 {
64 struct repair_block *rb = repair_block(ctx, block);
65
66 /* Local value numbering */
67 assert(node.type == AGX_INDEX_NORMAL);
68 agx_index *local = _mesa_hash_table_u64_search(rb->defs, node.value);
69
70 if (local) {
71 assert(!agx_is_null(*local));
72 return *local;
73 }
74
75 /* Global value numbering. readValueRecursive in the paper */
76 unsigned nr_preds = agx_num_predecessors(block);
77 agx_index val;
78
79 assert(nr_preds > 0);
80
81 /* Loop headers are not in the "sealedBlock" set in the paper. To
82 * handle, we insert an incomplete phi to be filled in after the rest of
83 * the loop is processed.
84 */
85 if (block->loop_header && !rb->sealed) {
86 val = agx_temp_like(ctx->shader, node);
87 agx_builder b = agx_init_builder(ctx->shader, agx_before_block(block));
88 agx_instr *phi = agx_phi_to(&b, val, nr_preds);
89 phi->shadow = true;
90
91 /* Stash the variable in for an intrusive incompletePhis map */
92 phi->imm = node.value + 1;
93 } else if (nr_preds == 1) {
94 /* No phi needed */
95 agx_block *pred =
96 *util_dynarray_element(&block->predecessors, agx_block *, 0);
97 val = resolve_read(ctx, pred, node);
98 } else {
99 /* Insert phi first to break cycles */
100 val = agx_temp_like(ctx->shader, node);
101 agx_builder b = agx_init_builder(ctx->shader, agx_before_block(block));
102 agx_instr *phi = agx_phi_to(&b, val, nr_preds);
103 phi->shadow = true;
104 record_write(ctx, block, node.value, val);
105 add_phi_operands(ctx, block, phi, node);
106 }
107
108 assert(!agx_is_null(val));
109 record_write(ctx, block, node.value, val);
110 return val;
111 }
112
113 static void
add_phi_operands(struct repair_ctx * ctx,agx_block * block,agx_instr * phi,agx_index node)114 add_phi_operands(struct repair_ctx *ctx, agx_block *block, agx_instr *phi,
115 agx_index node)
116 {
117 /* Add phi operands */
118 agx_foreach_predecessor(block, pred) {
119 unsigned s = agx_predecessor_index(block, *pred);
120 phi->src[s] = resolve_read(ctx, *pred, node);
121 }
122 }
123
124 static void
seal_block(struct repair_ctx * ctx,agx_block * block)125 seal_block(struct repair_ctx *ctx, agx_block *block)
126 {
127 agx_foreach_phi_in_block(block, phi) {
128 /* We use phi->imm as a sideband to pass the variable name. */
129 if (phi->imm) {
130 agx_index var = agx_get_vec_index(phi->imm - 1, phi->dest[0].size,
131 agx_channels(phi->dest[0]));
132 var.memory = phi->dest[0].memory;
133 add_phi_operands(ctx, block, phi, var);
134 phi->imm = 0;
135 }
136 }
137
138 repair_block(ctx, block)->sealed = true;
139 }
140
141 static void
seal_loop_headers(struct repair_ctx * ctx,struct agx_block * exit)142 seal_loop_headers(struct repair_ctx *ctx, struct agx_block *exit)
143 {
144 agx_foreach_successor(exit, succ) {
145 /* Only loop headers need to be sealed late */
146 if (!succ->loop_header)
147 continue;
148
149 /* Check if all predecessors have been processed */
150 bool any_unprocessed = false;
151
152 agx_foreach_predecessor(succ, P) {
153 if ((*P)->index > exit->index) {
154 any_unprocessed = true;
155 break;
156 }
157 }
158
159 /* Seal once all predecessors are processed */
160 if (!any_unprocessed)
161 seal_block(ctx, succ);
162 }
163 }
164
165 static void
agx_opt_trivial_phi(agx_context * ctx)166 agx_opt_trivial_phi(agx_context *ctx)
167 {
168 agx_index *remap = calloc(ctx->alloc, sizeof(*remap));
169 for (;;) {
170 bool progress = false;
171 memset(remap, 0, ctx->alloc * sizeof(*remap));
172
173 agx_foreach_block(ctx, block) {
174 agx_foreach_phi_in_block_safe(block, phi) {
175 agx_index same = agx_null();
176 bool all_same = true;
177
178 agx_foreach_src(phi, s) {
179 /* TODO: Handle cycles faster */
180 if (!agx_is_null(remap[phi->src[s].value])) {
181 all_same = false;
182 break;
183 }
184
185 /* Same value or self-reference */
186 if (agx_is_equiv(phi->src[s], same) ||
187 agx_is_equiv(phi->src[s], phi->dest[0]))
188 continue;
189
190 if (!agx_is_null(same)) {
191 all_same = false;
192 break;
193 }
194
195 same = phi->src[s];
196 }
197
198 if (all_same) {
199 remap[phi->dest[0].value] = same;
200 agx_remove_instruction(phi);
201 progress = true;
202 }
203 }
204 }
205
206 if (!progress)
207 break;
208
209 agx_foreach_instr_global(ctx, I) {
210 agx_foreach_ssa_src(I, s) {
211 if (!agx_is_null(remap[I->src[s].value])) {
212 agx_replace_src(I, s, remap[I->src[s].value]);
213 }
214 }
215 }
216 }
217
218 free(remap);
219 }
220
221 void
agx_repair_ssa(agx_context * ctx)222 agx_repair_ssa(agx_context *ctx)
223 {
224 struct repair_block *blocks =
225 rzalloc_array(NULL, struct repair_block, ctx->num_blocks);
226
227 agx_foreach_block(ctx, block) {
228 struct repair_block *rb = &blocks[block->index];
229 rb->defs = _mesa_hash_table_u64_create(blocks);
230 }
231
232 unsigned n = ctx->alloc;
233
234 agx_foreach_block(ctx, block) {
235 struct repair_ctx rctx = {
236 .shader = ctx,
237 .n = n,
238 .blocks = blocks,
239 };
240
241 agx_foreach_instr_in_block(block, I) {
242 /* Repair SSA for the instruction */
243 if (I->op != AGX_OPCODE_PHI) {
244 agx_foreach_ssa_src(I, s) {
245 assert(I->src[s].value < n);
246 agx_replace_src(I, s, resolve_read(&rctx, block, I->src[s]));
247 }
248 }
249
250 agx_foreach_ssa_dest(I, d) {
251 unsigned handle = I->dest[d].value;
252
253 /* Skip phis that we just created when processing loops */
254 if (handle >= n) {
255 assert(I->op == AGX_OPCODE_PHI);
256 continue;
257 }
258
259 I->dest[d] =
260 agx_replace_index(I->dest[d], agx_temp_like(ctx, I->dest[d]));
261
262 record_write(&rctx, block, handle, I->dest[d]);
263 }
264 }
265
266 seal_loop_headers(&rctx, block);
267 }
268
269 agx_foreach_block(ctx, block) {
270 agx_foreach_phi_in_block(block, phi) {
271 /* The kill bit is invalid (and meaningless) for phis. Liveness
272 * analysis does not produce it. However, we're ingesting broken SSA
273 * where we can have random kill bits set on phis. Strip them as part
274 * of the SSA repair.
275 *
276 * The register allocator depends on this for correctness.
277 */
278 phi->dest[0].kill = false;
279
280 agx_foreach_src(phi, s) {
281 phi->src[s].kill = false;
282 }
283
284 /* Skip the phis that we just created */
285 if (phi->shadow) {
286 phi->shadow = false;
287 continue;
288 }
289
290 agx_foreach_ssa_src(phi, s) {
291 /* Phis (uniquely) read their sources in their corresponding
292 * predecessors, so chain through for that.
293 */
294 agx_block *read_block =
295 *util_dynarray_element(&block->predecessors, agx_block *, s);
296
297 assert(phi->src[s].value < n);
298
299 struct repair_ctx rctx = {
300 .shader = ctx,
301 .n = n,
302 .blocks = blocks,
303 };
304
305 agx_replace_src(phi, s,
306 resolve_read(&rctx, read_block, phi->src[s]));
307 }
308 }
309 }
310
311 ralloc_free(blocks);
312
313 agx_opt_trivial_phi(ctx);
314 agx_reindex_ssa(ctx);
315 }
316