xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/agx_repair_ssa.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * Copyright 2023 Valve Corporation
4  * Copyright 2022 Collabora Ltd.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 /*
9  * Implementation of "Simple and Efficient
10  * Construction of Static Single Assignment Form", also by Braun et al.
11  * https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf
12  */
13 
14 #include "util/hash_table.h"
15 #include "util/ralloc.h"
16 #include "util/u_dynarray.h"
17 #include "agx_builder.h"
18 #include "agx_compiler.h"
19 #include "agx_opcodes.h"
20 
21 struct repair_block {
22    /* For a loop header, whether phis operands have been added */
23    bool sealed;
24 
25    /* Sparse map: variable name -> agx_index.
26     *
27     * Definition of a variable at the end of the block.
28     */
29    struct hash_table_u64 *defs;
30 };
31 
32 struct repair_ctx {
33    agx_context *shader;
34 
35    /* Number of variables */
36    unsigned n;
37 
38    /* Information on blocks indexed in source order */
39    struct repair_block *blocks;
40 };
41 
42 static inline struct repair_block *
repair_block(struct repair_ctx * ctx,agx_block * block)43 repair_block(struct repair_ctx *ctx, agx_block *block)
44 {
45    return &ctx->blocks[block->index];
46 }
47 
48 static void
record_write(struct repair_ctx * ctx,agx_block * block,unsigned node,agx_index val)49 record_write(struct repair_ctx *ctx, agx_block *block, unsigned node,
50              agx_index val)
51 {
52    assert(node < ctx->n);
53    struct hash_table_u64 *defs = repair_block(ctx, block)->defs;
54    _mesa_hash_table_u64_insert(defs, node,
55                                ralloc_memdup(defs, &val, sizeof(val)));
56 }
57 
58 static void add_phi_operands(struct repair_ctx *ctx, agx_block *block,
59                              agx_instr *phi, agx_index node);
60 
61 static agx_index
resolve_read(struct repair_ctx * ctx,agx_block * block,agx_index node)62 resolve_read(struct repair_ctx *ctx, agx_block *block, agx_index node)
63 {
64    struct repair_block *rb = repair_block(ctx, block);
65 
66    /* Local value numbering */
67    assert(node.type == AGX_INDEX_NORMAL);
68    agx_index *local = _mesa_hash_table_u64_search(rb->defs, node.value);
69 
70    if (local) {
71       assert(!agx_is_null(*local));
72       return *local;
73    }
74 
75    /* Global value numbering. readValueRecursive in the paper */
76    unsigned nr_preds = agx_num_predecessors(block);
77    agx_index val;
78 
79    assert(nr_preds > 0);
80 
81    /* Loop headers are not in the "sealedBlock" set in the paper. To
82     * handle, we insert an incomplete phi to be filled in after the rest of
83     * the loop is processed.
84     */
85    if (block->loop_header && !rb->sealed) {
86       val = agx_temp_like(ctx->shader, node);
87       agx_builder b = agx_init_builder(ctx->shader, agx_before_block(block));
88       agx_instr *phi = agx_phi_to(&b, val, nr_preds);
89       phi->shadow = true;
90 
91       /* Stash the variable in for an intrusive incompletePhis map */
92       phi->imm = node.value + 1;
93    } else if (nr_preds == 1) {
94       /* No phi needed */
95       agx_block *pred =
96          *util_dynarray_element(&block->predecessors, agx_block *, 0);
97       val = resolve_read(ctx, pred, node);
98    } else {
99       /* Insert phi first to break cycles */
100       val = agx_temp_like(ctx->shader, node);
101       agx_builder b = agx_init_builder(ctx->shader, agx_before_block(block));
102       agx_instr *phi = agx_phi_to(&b, val, nr_preds);
103       phi->shadow = true;
104       record_write(ctx, block, node.value, val);
105       add_phi_operands(ctx, block, phi, node);
106    }
107 
108    assert(!agx_is_null(val));
109    record_write(ctx, block, node.value, val);
110    return val;
111 }
112 
113 static void
add_phi_operands(struct repair_ctx * ctx,agx_block * block,agx_instr * phi,agx_index node)114 add_phi_operands(struct repair_ctx *ctx, agx_block *block, agx_instr *phi,
115                  agx_index node)
116 {
117    /* Add phi operands */
118    agx_foreach_predecessor(block, pred) {
119       unsigned s = agx_predecessor_index(block, *pred);
120       phi->src[s] = resolve_read(ctx, *pred, node);
121    }
122 }
123 
124 static void
seal_block(struct repair_ctx * ctx,agx_block * block)125 seal_block(struct repair_ctx *ctx, agx_block *block)
126 {
127    agx_foreach_phi_in_block(block, phi) {
128       /* We use phi->imm as a sideband to pass the variable name. */
129       if (phi->imm) {
130          agx_index var = agx_get_vec_index(phi->imm - 1, phi->dest[0].size,
131                                            agx_channels(phi->dest[0]));
132          var.memory = phi->dest[0].memory;
133          add_phi_operands(ctx, block, phi, var);
134          phi->imm = 0;
135       }
136    }
137 
138    repair_block(ctx, block)->sealed = true;
139 }
140 
141 static void
seal_loop_headers(struct repair_ctx * ctx,struct agx_block * exit)142 seal_loop_headers(struct repair_ctx *ctx, struct agx_block *exit)
143 {
144    agx_foreach_successor(exit, succ) {
145       /* Only loop headers need to be sealed late */
146       if (!succ->loop_header)
147          continue;
148 
149       /* Check if all predecessors have been processed */
150       bool any_unprocessed = false;
151 
152       agx_foreach_predecessor(succ, P) {
153          if ((*P)->index > exit->index) {
154             any_unprocessed = true;
155             break;
156          }
157       }
158 
159       /* Seal once all predecessors are processed */
160       if (!any_unprocessed)
161          seal_block(ctx, succ);
162    }
163 }
164 
165 static void
agx_opt_trivial_phi(agx_context * ctx)166 agx_opt_trivial_phi(agx_context *ctx)
167 {
168    agx_index *remap = calloc(ctx->alloc, sizeof(*remap));
169    for (;;) {
170       bool progress = false;
171       memset(remap, 0, ctx->alloc * sizeof(*remap));
172 
173       agx_foreach_block(ctx, block) {
174          agx_foreach_phi_in_block_safe(block, phi) {
175             agx_index same = agx_null();
176             bool all_same = true;
177 
178             agx_foreach_src(phi, s) {
179                /* TODO: Handle cycles faster */
180                if (!agx_is_null(remap[phi->src[s].value])) {
181                   all_same = false;
182                   break;
183                }
184 
185                /* Same value or self-reference */
186                if (agx_is_equiv(phi->src[s], same) ||
187                    agx_is_equiv(phi->src[s], phi->dest[0]))
188                   continue;
189 
190                if (!agx_is_null(same)) {
191                   all_same = false;
192                   break;
193                }
194 
195                same = phi->src[s];
196             }
197 
198             if (all_same) {
199                remap[phi->dest[0].value] = same;
200                agx_remove_instruction(phi);
201                progress = true;
202             }
203          }
204       }
205 
206       if (!progress)
207          break;
208 
209       agx_foreach_instr_global(ctx, I) {
210          agx_foreach_ssa_src(I, s) {
211             if (!agx_is_null(remap[I->src[s].value])) {
212                agx_replace_src(I, s, remap[I->src[s].value]);
213             }
214          }
215       }
216    }
217 
218    free(remap);
219 }
220 
221 void
agx_repair_ssa(agx_context * ctx)222 agx_repair_ssa(agx_context *ctx)
223 {
224    struct repair_block *blocks =
225       rzalloc_array(NULL, struct repair_block, ctx->num_blocks);
226 
227    agx_foreach_block(ctx, block) {
228       struct repair_block *rb = &blocks[block->index];
229       rb->defs = _mesa_hash_table_u64_create(blocks);
230    }
231 
232    unsigned n = ctx->alloc;
233 
234    agx_foreach_block(ctx, block) {
235       struct repair_ctx rctx = {
236          .shader = ctx,
237          .n = n,
238          .blocks = blocks,
239       };
240 
241       agx_foreach_instr_in_block(block, I) {
242          /* Repair SSA for the instruction */
243          if (I->op != AGX_OPCODE_PHI) {
244             agx_foreach_ssa_src(I, s) {
245                assert(I->src[s].value < n);
246                agx_replace_src(I, s, resolve_read(&rctx, block, I->src[s]));
247             }
248          }
249 
250          agx_foreach_ssa_dest(I, d) {
251             unsigned handle = I->dest[d].value;
252 
253             /* Skip phis that we just created when processing loops */
254             if (handle >= n) {
255                assert(I->op == AGX_OPCODE_PHI);
256                continue;
257             }
258 
259             I->dest[d] =
260                agx_replace_index(I->dest[d], agx_temp_like(ctx, I->dest[d]));
261 
262             record_write(&rctx, block, handle, I->dest[d]);
263          }
264       }
265 
266       seal_loop_headers(&rctx, block);
267    }
268 
269    agx_foreach_block(ctx, block) {
270       agx_foreach_phi_in_block(block, phi) {
271          /* The kill bit is invalid (and meaningless) for phis. Liveness
272           * analysis does not produce it. However, we're ingesting broken SSA
273           * where we can have random kill bits set on phis. Strip them as part
274           * of the SSA repair.
275           *
276           * The register allocator depends on this for correctness.
277           */
278          phi->dest[0].kill = false;
279 
280          agx_foreach_src(phi, s) {
281             phi->src[s].kill = false;
282          }
283 
284          /* Skip the phis that we just created */
285          if (phi->shadow) {
286             phi->shadow = false;
287             continue;
288          }
289 
290          agx_foreach_ssa_src(phi, s) {
291             /* Phis (uniquely) read their sources in their corresponding
292              * predecessors, so chain through for that.
293              */
294             agx_block *read_block =
295                *util_dynarray_element(&block->predecessors, agx_block *, s);
296 
297             assert(phi->src[s].value < n);
298 
299             struct repair_ctx rctx = {
300                .shader = ctx,
301                .n = n,
302                .blocks = blocks,
303             };
304 
305             agx_replace_src(phi, s,
306                             resolve_read(&rctx, read_block, phi->src[s]));
307          }
308       }
309    }
310 
311    ralloc_free(blocks);
312 
313    agx_opt_trivial_phi(ctx);
314    agx_reindex_ssa(ctx);
315 }
316