xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a2xx/ir2_ra.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Jonathan Marek <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Jonathan Marek <[email protected]>
7  */
8 
9 #include "ir2_private.h"
10 
11 /* if an instruction has side effects, we should never kill it */
12 static bool
has_side_effects(struct ir2_instr * instr)13 has_side_effects(struct ir2_instr *instr)
14 {
15    if (instr->type == IR2_CF)
16       return true;
17    else if (instr->type == IR2_FETCH)
18       return false;
19 
20    switch (instr->alu.scalar_opc) {
21    case PRED_SETEs ... KILLONEs:
22       return true;
23    default:
24       break;
25    }
26 
27    switch (instr->alu.vector_opc) {
28    case PRED_SETE_PUSHv ... KILLNEv:
29       return true;
30    default:
31       break;
32    }
33 
34    return instr->alu.export >= 0;
35 }
36 
37 /* mark an instruction as required, and all its sources recursively */
38 static void
set_need_emit(struct ir2_context * ctx,struct ir2_instr * instr)39 set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
40 {
41    struct ir2_reg *reg;
42 
43    /* don't repeat work already done */
44    if (instr->need_emit)
45       return;
46 
47    instr->need_emit = true;
48 
49    ir2_foreach_src (src, instr) {
50       switch (src->type) {
51       case IR2_SRC_SSA:
52          set_need_emit(ctx, &ctx->instr[src->num]);
53          break;
54       case IR2_SRC_REG:
55          /* slow ..  */
56          reg = get_reg_src(ctx, src);
57          ir2_foreach_instr (instr, ctx) {
58             if (!instr->is_ssa && instr->reg == reg)
59                set_need_emit(ctx, instr);
60          }
61          break;
62       default:
63          break;
64       }
65    }
66 }
67 
68 /* get current bit mask of allocated components for a register */
69 static unsigned
reg_mask(struct ir2_context * ctx,unsigned idx)70 reg_mask(struct ir2_context *ctx, unsigned idx)
71 {
72    return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf;
73 }
74 
75 static void
reg_setmask(struct ir2_context * ctx,unsigned idx,unsigned c)76 reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
77 {
78    idx = idx * 4 + c;
79    ctx->reg_state[idx / 32] |= 1 << idx % 32;
80 }
81 
82 static void
reg_freemask(struct ir2_context * ctx,unsigned idx,unsigned c)83 reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
84 {
85    idx = idx * 4 + c;
86    ctx->reg_state[idx / 32] &= ~(1 << idx % 32);
87 }
88 
89 void
ra_count_refs(struct ir2_context * ctx)90 ra_count_refs(struct ir2_context *ctx)
91 {
92    struct ir2_reg *reg;
93 
94    /* mark instructions as needed
95     * need to do this because "substitutions" pass makes many movs not needed
96     */
97    ir2_foreach_instr (instr, ctx) {
98       if (has_side_effects(instr))
99          set_need_emit(ctx, instr);
100    }
101 
102    /* compute ref_counts */
103    ir2_foreach_instr (instr, ctx) {
104       /* kill non-needed so they can be skipped */
105       if (!instr->need_emit) {
106          instr->type = IR2_NONE;
107          continue;
108       }
109 
110       ir2_foreach_src (src, instr) {
111          if (src->type == IR2_SRC_CONST)
112             continue;
113 
114          reg = get_reg_src(ctx, src);
115          for (int i = 0; i < src_ncomp(instr); i++)
116             reg->comp[swiz_get(src->swizzle, i)].ref_count++;
117       }
118    }
119 }
120 
121 void
ra_reg(struct ir2_context * ctx,struct ir2_reg * reg,int force_idx,bool export,uint8_t export_writemask)122 ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export,
123        uint8_t export_writemask)
124 {
125    /* for export, don't allocate anything but set component layout */
126    if (export) {
127       for (int i = 0; i < 4; i++)
128          reg->comp[i].c = i;
129       return;
130    }
131 
132    unsigned idx = force_idx;
133 
134    /* TODO: allocate into the same register if theres room
135     * note: the blob doesn't do it, so verify that it is indeed better
136     * also, doing it would conflict with scalar mov insertion
137     */
138 
139    /* check if already allocated */
140    for (int i = 0; i < reg->ncomp; i++) {
141       if (reg->comp[i].alloc)
142          return;
143    }
144 
145    if (force_idx < 0) {
146       for (idx = 0; idx < 64; idx++) {
147          if (reg_mask(ctx, idx) == 0)
148             break;
149       }
150    }
151    assert(idx != 64); /* TODO ran out of register space.. */
152 
153    /* update max_reg value */
154    ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx);
155 
156    unsigned mask = reg_mask(ctx, idx);
157 
158    for (int i = 0; i < reg->ncomp; i++) {
159       /* don't allocate never used values */
160       if (reg->comp[i].ref_count == 0) {
161          reg->comp[i].c = 7;
162          continue;
163       }
164 
165       /* TODO */
166       unsigned c = 1 ? i : (ffs(~mask) - 1);
167       mask |= 1 << c;
168       reg->comp[i].c = c;
169       reg_setmask(ctx, idx, c);
170       reg->comp[i].alloc = true;
171    }
172 
173    reg->idx = idx;
174    ctx->live_regs[reg->idx] = reg;
175 }
176 
177 /* reduce srcs ref_count and free if needed */
178 void
ra_src_free(struct ir2_context * ctx,struct ir2_instr * instr)179 ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
180 {
181    struct ir2_reg *reg;
182    struct ir2_reg_component *comp;
183 
184    ir2_foreach_src (src, instr) {
185       if (src->type == IR2_SRC_CONST)
186          continue;
187 
188       reg = get_reg_src(ctx, src);
189       /* XXX use before write case */
190 
191       for (int i = 0; i < src_ncomp(instr); i++) {
192          comp = &reg->comp[swiz_get(src->swizzle, i)];
193          if (!--comp->ref_count && reg->block_idx_free < 0) {
194             reg_freemask(ctx, reg->idx, comp->c);
195             comp->alloc = false;
196          }
197       }
198    }
199 }
200 
201 /* free any regs left for a block */
202 void
ra_block_free(struct ir2_context * ctx,unsigned block)203 ra_block_free(struct ir2_context *ctx, unsigned block)
204 {
205    ir2_foreach_live_reg (reg, ctx) {
206       if (reg->block_idx_free != block)
207          continue;
208 
209       for (int i = 0; i < reg->ncomp; i++) {
210          if (!reg->comp[i].alloc) /* XXX should never be true? */
211             continue;
212 
213          reg_freemask(ctx, reg->idx, reg->comp[i].c);
214          reg->comp[i].alloc = false;
215       }
216       ctx->live_regs[reg->idx] = NULL;
217    }
218 }
219