1 /*
2 * Copyright 2022 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "agx_builder.h"
7 #include "agx_compiler.h"
8 #include "agx_debug.h"
9
10 #define AGX_MAX_PENDING (8)
11
12 /*
13 * Returns whether an instruction is asynchronous and needs a scoreboard slot
14 */
15 static bool
instr_is_async(agx_instr * I)16 instr_is_async(agx_instr *I)
17 {
18 return agx_opcodes_info[I->op].immediates & AGX_IMMEDIATE_SCOREBOARD;
19 }
20
21 struct slot {
22 /* Set of registers this slot is currently writing */
23 BITSET_DECLARE(writes, AGX_NUM_REGS);
24
25 /* Number of pending messages on this slot. Must not exceed
26 * AGX_MAX_PENDING for correct results.
27 */
28 uint8_t nr_pending;
29 };
30
31 /*
32 * Insert waits within a block to stall after every async instruction. Useful
33 * for debugging.
34 */
35 static void
agx_insert_waits_trivial(agx_context * ctx,agx_block * block)36 agx_insert_waits_trivial(agx_context *ctx, agx_block *block)
37 {
38 agx_foreach_instr_in_block_safe(block, I) {
39 if (instr_is_async(I)) {
40 agx_builder b = agx_init_builder(ctx, agx_after_instr(I));
41 agx_wait(&b, I->scoreboard);
42 }
43 }
44 }
45
46 /*
47 * Insert waits within a block, assuming scoreboard slots have already been
48 * assigned. This waits for everything at the end of the block, rather than
49 * doing something more intelligent/global. This should be optimized.
50 *
51 * XXX: Do any instructions read their sources asynchronously?
52 */
53 static void
agx_insert_waits_local(agx_context * ctx,agx_block * block)54 agx_insert_waits_local(agx_context *ctx, agx_block *block)
55 {
56 struct slot slots[2] = {0};
57
58 agx_foreach_instr_in_block_safe(block, I) {
59 uint8_t wait_mask = 0;
60
61 /* Check for read-after-write */
62 agx_foreach_src(I, s) {
63 if (I->src[s].type != AGX_INDEX_REGISTER)
64 continue;
65
66 unsigned nr_read = agx_index_size_16(I->src[s]);
67 for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
68 if (BITSET_TEST_RANGE(slots[slot].writes, I->src[s].value,
69 I->src[s].value + nr_read - 1))
70 wait_mask |= BITSET_BIT(slot);
71 }
72 }
73
74 /* Check for write-after-write */
75 agx_foreach_dest(I, d) {
76 if (I->dest[d].type != AGX_INDEX_REGISTER)
77 continue;
78
79 unsigned nr_writes = agx_index_size_16(I->dest[d]);
80 for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
81 if (BITSET_TEST_RANGE(slots[slot].writes, I->dest[d].value,
82 I->dest[d].value + nr_writes - 1))
83 wait_mask |= BITSET_BIT(slot);
84 }
85 }
86
87 /* Check for barriers */
88 if (I->op == AGX_OPCODE_THREADGROUP_BARRIER ||
89 I->op == AGX_OPCODE_MEMORY_BARRIER) {
90
91 for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
92 if (slots[slot].nr_pending)
93 wait_mask |= BITSET_BIT(slot);
94 }
95 }
96
97 /* Try to assign a free slot */
98 if (instr_is_async(I)) {
99 for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
100 if (slots[slot].nr_pending == 0) {
101 I->scoreboard = slot;
102 break;
103 }
104 }
105 }
106
107 /* Check for slot overflow */
108 if (instr_is_async(I) &&
109 slots[I->scoreboard].nr_pending >= AGX_MAX_PENDING)
110 wait_mask |= BITSET_BIT(I->scoreboard);
111
112 /* Insert the appropriate waits, clearing the slots */
113 u_foreach_bit(slot, wait_mask) {
114 agx_builder b = agx_init_builder(ctx, agx_before_instr(I));
115 agx_wait(&b, slot);
116
117 BITSET_ZERO(slots[slot].writes);
118 slots[slot].nr_pending = 0;
119 }
120
121 /* Record access */
122 if (instr_is_async(I)) {
123 agx_foreach_dest(I, d) {
124 if (agx_is_null(I->dest[d]))
125 continue;
126
127 assert(I->dest[d].type == AGX_INDEX_REGISTER);
128 BITSET_SET_RANGE(
129 slots[I->scoreboard].writes, I->dest[d].value,
130 I->dest[d].value + agx_index_size_16(I->dest[d]) - 1);
131 }
132
133 slots[I->scoreboard].nr_pending++;
134 }
135 }
136
137 /* If there are outstanding messages, wait for them. We don't do this for the
138 * exit block, though, since nothing else will execute in the shader so
139 * waiting is pointless.
140 */
141 if (block != agx_exit_block(ctx)) {
142 agx_builder b = agx_init_builder(ctx, agx_after_block_logical(block));
143
144 for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
145 if (slots[slot].nr_pending)
146 agx_wait(&b, slot);
147 }
148 }
149 }
150
151 /*
152 * Assign scoreboard slots to asynchronous instructions and insert waits for the
153 * appropriate hazard tracking.
154 */
155 void
agx_insert_waits(agx_context * ctx)156 agx_insert_waits(agx_context *ctx)
157 {
158 agx_foreach_block(ctx, block) {
159 if (agx_compiler_debug & AGX_DBG_WAIT)
160 agx_insert_waits_trivial(ctx, block);
161 else
162 agx_insert_waits_local(ctx, block);
163 }
164 }
165