xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/agx_insert_waits.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_builder.h"
7 #include "agx_compiler.h"
8 #include "agx_debug.h"
9 
10 #define AGX_MAX_PENDING (8)
11 
12 /*
13  * Returns whether an instruction is asynchronous and needs a scoreboard slot
14  */
15 static bool
instr_is_async(agx_instr * I)16 instr_is_async(agx_instr *I)
17 {
18    return agx_opcodes_info[I->op].immediates & AGX_IMMEDIATE_SCOREBOARD;
19 }
20 
21 struct slot {
22    /* Set of registers this slot is currently writing */
23    BITSET_DECLARE(writes, AGX_NUM_REGS);
24 
25    /* Number of pending messages on this slot. Must not exceed
26     * AGX_MAX_PENDING for correct results.
27     */
28    uint8_t nr_pending;
29 };
30 
31 /*
32  * Insert waits within a block to stall after every async instruction. Useful
33  * for debugging.
34  */
35 static void
agx_insert_waits_trivial(agx_context * ctx,agx_block * block)36 agx_insert_waits_trivial(agx_context *ctx, agx_block *block)
37 {
38    agx_foreach_instr_in_block_safe(block, I) {
39       if (instr_is_async(I)) {
40          agx_builder b = agx_init_builder(ctx, agx_after_instr(I));
41          agx_wait(&b, I->scoreboard);
42       }
43    }
44 }
45 
46 /*
47  * Insert waits within a block, assuming scoreboard slots have already been
48  * assigned. This waits for everything at the end of the block, rather than
49  * doing something more intelligent/global. This should be optimized.
50  *
51  * XXX: Do any instructions read their sources asynchronously?
52  */
53 static void
agx_insert_waits_local(agx_context * ctx,agx_block * block)54 agx_insert_waits_local(agx_context *ctx, agx_block *block)
55 {
56    struct slot slots[2] = {0};
57 
58    agx_foreach_instr_in_block_safe(block, I) {
59       uint8_t wait_mask = 0;
60 
61       /* Check for read-after-write */
62       agx_foreach_src(I, s) {
63          if (I->src[s].type != AGX_INDEX_REGISTER)
64             continue;
65 
66          unsigned nr_read = agx_index_size_16(I->src[s]);
67          for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
68             if (BITSET_TEST_RANGE(slots[slot].writes, I->src[s].value,
69                                   I->src[s].value + nr_read - 1))
70                wait_mask |= BITSET_BIT(slot);
71          }
72       }
73 
74       /* Check for write-after-write */
75       agx_foreach_dest(I, d) {
76          if (I->dest[d].type != AGX_INDEX_REGISTER)
77             continue;
78 
79          unsigned nr_writes = agx_index_size_16(I->dest[d]);
80          for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
81             if (BITSET_TEST_RANGE(slots[slot].writes, I->dest[d].value,
82                                   I->dest[d].value + nr_writes - 1))
83                wait_mask |= BITSET_BIT(slot);
84          }
85       }
86 
87       /* Check for barriers */
88       if (I->op == AGX_OPCODE_THREADGROUP_BARRIER ||
89           I->op == AGX_OPCODE_MEMORY_BARRIER) {
90 
91          for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
92             if (slots[slot].nr_pending)
93                wait_mask |= BITSET_BIT(slot);
94          }
95       }
96 
97       /* Try to assign a free slot */
98       if (instr_is_async(I)) {
99          for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
100             if (slots[slot].nr_pending == 0) {
101                I->scoreboard = slot;
102                break;
103             }
104          }
105       }
106 
107       /* Check for slot overflow */
108       if (instr_is_async(I) &&
109           slots[I->scoreboard].nr_pending >= AGX_MAX_PENDING)
110          wait_mask |= BITSET_BIT(I->scoreboard);
111 
112       /* Insert the appropriate waits, clearing the slots */
113       u_foreach_bit(slot, wait_mask) {
114          agx_builder b = agx_init_builder(ctx, agx_before_instr(I));
115          agx_wait(&b, slot);
116 
117          BITSET_ZERO(slots[slot].writes);
118          slots[slot].nr_pending = 0;
119       }
120 
121       /* Record access */
122       if (instr_is_async(I)) {
123          agx_foreach_dest(I, d) {
124             if (agx_is_null(I->dest[d]))
125                continue;
126 
127             assert(I->dest[d].type == AGX_INDEX_REGISTER);
128             BITSET_SET_RANGE(
129                slots[I->scoreboard].writes, I->dest[d].value,
130                I->dest[d].value + agx_index_size_16(I->dest[d]) - 1);
131          }
132 
133          slots[I->scoreboard].nr_pending++;
134       }
135    }
136 
137    /* If there are outstanding messages, wait for them. We don't do this for the
138     * exit block, though, since nothing else will execute in the shader so
139     * waiting is pointless.
140     */
141    if (block != agx_exit_block(ctx)) {
142       agx_builder b = agx_init_builder(ctx, agx_after_block_logical(block));
143 
144       for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
145          if (slots[slot].nr_pending)
146             agx_wait(&b, slot);
147       }
148    }
149 }
150 
151 /*
152  * Assign scoreboard slots to asynchronous instructions and insert waits for the
153  * appropriate hazard tracking.
154  */
155 void
agx_insert_waits(agx_context * ctx)156 agx_insert_waits(agx_context *ctx)
157 {
158    agx_foreach_block(ctx, block) {
159       if (agx_compiler_debug & AGX_DBG_WAIT)
160          agx_insert_waits_trivial(ctx, block);
161       else
162          agx_insert_waits_local(ctx, block);
163    }
164 }
165