xref: /aosp_15_r20/external/mesa3d/src/panfrost/compiler/valhall/va_mark_last.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2022 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "va_compiler.h"
25 #include "valhall_enums.h"
26 
27 /*
28  * Valhall sources may marked as the last use of a register, according
29  * to the following rules:
30  *
31  * 1. The last use of a register should be marked allowing the hardware
32  *    to elide register writes.
33  * 2. Staging sources may be read at any time before the asynchronous
34  *    instruction completes. If a register is used as both a staging source and
35  *    a regular source, the regular source cannot be marked until the program
36  *    waits for the asynchronous instruction.
37  * 3. Marking a register pair marks both registers in the pair.
38  *
39  * Last use information follows immediately from (post-RA) liveness analysis:
40  * a register is dead immediately after its last use.
41  *
42  * Staging information follows from scoreboard analysis: do not mark registers
43  * that are read by a pending asynchronous instruction. Note that the Valhall
44  * scoreboard analysis does not track reads, so we handle that with our own
45  * (simplified) scoreboard analysis.
46  *
47  * Register pairs are marked conservatively: if either register in a pair cannot
48  * be marked, do not mark either register.
49  */
50 
51 static uint64_t
bi_staging_read_mask(const bi_instr * I)52 bi_staging_read_mask(const bi_instr *I)
53 {
54    uint64_t mask = 0;
55 
56    bi_foreach_src(I, s) {
57       if (bi_is_staging_src(I, s) && !bi_is_null(I->src[s])) {
58          assert(I->src[s].type == BI_INDEX_REGISTER);
59          unsigned reg = I->src[s].value;
60          unsigned count = bi_count_read_registers(I, s);
61 
62          mask |= (BITFIELD64_MASK(count) << reg);
63       }
64    }
65 
66    return mask;
67 }
68 
69 static bool
bi_writes_reg(const bi_instr * I,unsigned reg)70 bi_writes_reg(const bi_instr *I, unsigned reg)
71 {
72    bi_foreach_dest(I, d) {
73       assert(I->dest[d].type == BI_INDEX_REGISTER);
74 
75       unsigned count = bi_count_write_registers(I, d);
76 
77       if (reg >= I->dest[d].value && (reg - I->dest[d].value) < count)
78          return true;
79    }
80 
81    return false;
82 }
83 
84 static unsigned
waits_on_slot(enum va_flow flow,unsigned slot)85 waits_on_slot(enum va_flow flow, unsigned slot)
86 {
87    return (flow == VA_FLOW_WAIT) || (flow == VA_FLOW_WAIT0126) ||
88           (va_flow_is_wait_or_none(flow) && (flow & BITFIELD_BIT(slot)));
89 }
90 
91 static void
scoreboard_update(struct bi_scoreboard_state * st,const bi_instr * I)92 scoreboard_update(struct bi_scoreboard_state *st, const bi_instr *I)
93 {
94    /* Mark read staging registers */
95    st->read[I->slot] |= bi_staging_read_mask(I);
96 
97    /* Unmark registers after they are waited on */
98    for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) {
99       if (waits_on_slot(I->flow, i))
100          st->read[i] = 0;
101    }
102 }
103 
104 static void
va_analyze_scoreboard_reads(bi_context * ctx)105 va_analyze_scoreboard_reads(bi_context *ctx)
106 {
107    u_worklist worklist;
108    bi_worklist_init(ctx, &worklist);
109 
110    bi_foreach_block(ctx, block) {
111       bi_worklist_push_tail(&worklist, block);
112 
113       /* Reset analysis from previous pass */
114       block->scoreboard_in = (struct bi_scoreboard_state){0};
115       block->scoreboard_out = (struct bi_scoreboard_state){0};
116    }
117 
118    /* Perform forward data flow analysis to calculate dependencies */
119    while (!u_worklist_is_empty(&worklist)) {
120       /* Pop from the front for forward analysis */
121       bi_block *blk = bi_worklist_pop_head(&worklist);
122 
123       bi_foreach_predecessor(blk, pred) {
124          for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i)
125             blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
126       }
127 
128       struct bi_scoreboard_state state = blk->scoreboard_in;
129 
130       bi_foreach_instr_in_block(blk, I)
131          scoreboard_update(&state, I);
132 
133       /* If there was progress, reprocess successors */
134       if (memcmp(&state, &blk->scoreboard_out, sizeof(state)) != 0) {
135          bi_foreach_successor(blk, succ)
136             bi_worklist_push_tail(&worklist, succ);
137       }
138 
139       blk->scoreboard_out = state;
140    }
141 
142    u_worklist_fini(&worklist);
143 }
144 
145 void
va_mark_last(bi_context * ctx)146 va_mark_last(bi_context *ctx)
147 {
148    /* Analyze the shader globally */
149    bi_postra_liveness(ctx);
150    va_analyze_scoreboard_reads(ctx);
151 
152    bi_foreach_block(ctx, block) {
153       uint64_t live = block->reg_live_out;
154 
155       /* Mark all last uses */
156       bi_foreach_instr_in_block_rev(block, I) {
157          bi_foreach_src(I, s) {
158             if (I->src[s].type != BI_INDEX_REGISTER)
159                continue;
160 
161             unsigned nr = bi_count_read_registers(I, s);
162             uint64_t mask = BITFIELD64_MASK(nr) << I->src[s].value;
163 
164             /* If the register dead after this instruction, it's the last use */
165             I->src[s].discard = (live & mask) == 0;
166 
167             /* If the register is overwritten this cycle, it is implicitly
168              * discarded, but that won't show up in the liveness analysis.
169              */
170             I->src[s].discard |= bi_writes_reg(I, I->src[s].value);
171          }
172 
173          live = bi_postra_liveness_ins(live, I);
174       }
175 
176       struct bi_scoreboard_state st = block->scoreboard_in;
177 
178       bi_foreach_instr_in_block(block, I) {
179          /* Unmark registers read by a pending async instruction */
180          bi_foreach_src(I, s) {
181             if (!I->src[s].discard)
182                continue;
183 
184             assert(I->src[s].type == BI_INDEX_REGISTER);
185 
186             uint64_t pending_regs = st.read[0] | st.read[1] | st.read[2];
187             bool pending = (pending_regs & BITFIELD64_BIT(I->src[s].value));
188 
189             if (bi_is_staging_src(I, s) || pending)
190                I->src[s].discard = false;
191          }
192 
193          /* Unmark register pairs where one half must be preserved */
194          bi_foreach_src(I, s) {
195             /* Only look for "real" architectural registers */
196             if (s >= 3)
197                break;
198 
199             if (va_src_info(I->op, s).size == VA_SIZE_64) {
200                bool both_discard = I->src[s].discard && I->src[s + 1].discard;
201 
202                I->src[s + 0].discard = both_discard;
203                I->src[s + 1].discard = both_discard;
204             }
205          }
206 
207          scoreboard_update(&st, I);
208       }
209    }
210 }
211