1 /*
2 * Copyright © 2018 Jonathan Marek <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Jonathan Marek <[email protected]>
7 */
8
9 #include "ir2_private.h"
10
11 /* if an instruction has side effects, we should never kill it */
12 static bool
has_side_effects(struct ir2_instr * instr)13 has_side_effects(struct ir2_instr *instr)
14 {
15 if (instr->type == IR2_CF)
16 return true;
17 else if (instr->type == IR2_FETCH)
18 return false;
19
20 switch (instr->alu.scalar_opc) {
21 case PRED_SETEs ... KILLONEs:
22 return true;
23 default:
24 break;
25 }
26
27 switch (instr->alu.vector_opc) {
28 case PRED_SETE_PUSHv ... KILLNEv:
29 return true;
30 default:
31 break;
32 }
33
34 return instr->alu.export >= 0;
35 }
36
37 /* mark an instruction as required, and all its sources recursively */
38 static void
set_need_emit(struct ir2_context * ctx,struct ir2_instr * instr)39 set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
40 {
41 struct ir2_reg *reg;
42
43 /* don't repeat work already done */
44 if (instr->need_emit)
45 return;
46
47 instr->need_emit = true;
48
49 ir2_foreach_src (src, instr) {
50 switch (src->type) {
51 case IR2_SRC_SSA:
52 set_need_emit(ctx, &ctx->instr[src->num]);
53 break;
54 case IR2_SRC_REG:
55 /* slow .. */
56 reg = get_reg_src(ctx, src);
57 ir2_foreach_instr (instr, ctx) {
58 if (!instr->is_ssa && instr->reg == reg)
59 set_need_emit(ctx, instr);
60 }
61 break;
62 default:
63 break;
64 }
65 }
66 }
67
68 /* get current bit mask of allocated components for a register */
69 static unsigned
reg_mask(struct ir2_context * ctx,unsigned idx)70 reg_mask(struct ir2_context *ctx, unsigned idx)
71 {
72 return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf;
73 }
74
75 static void
reg_setmask(struct ir2_context * ctx,unsigned idx,unsigned c)76 reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
77 {
78 idx = idx * 4 + c;
79 ctx->reg_state[idx / 32] |= 1 << idx % 32;
80 }
81
82 static void
reg_freemask(struct ir2_context * ctx,unsigned idx,unsigned c)83 reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
84 {
85 idx = idx * 4 + c;
86 ctx->reg_state[idx / 32] &= ~(1 << idx % 32);
87 }
88
89 void
ra_count_refs(struct ir2_context * ctx)90 ra_count_refs(struct ir2_context *ctx)
91 {
92 struct ir2_reg *reg;
93
94 /* mark instructions as needed
95 * need to do this because "substitutions" pass makes many movs not needed
96 */
97 ir2_foreach_instr (instr, ctx) {
98 if (has_side_effects(instr))
99 set_need_emit(ctx, instr);
100 }
101
102 /* compute ref_counts */
103 ir2_foreach_instr (instr, ctx) {
104 /* kill non-needed so they can be skipped */
105 if (!instr->need_emit) {
106 instr->type = IR2_NONE;
107 continue;
108 }
109
110 ir2_foreach_src (src, instr) {
111 if (src->type == IR2_SRC_CONST)
112 continue;
113
114 reg = get_reg_src(ctx, src);
115 for (int i = 0; i < src_ncomp(instr); i++)
116 reg->comp[swiz_get(src->swizzle, i)].ref_count++;
117 }
118 }
119 }
120
121 void
ra_reg(struct ir2_context * ctx,struct ir2_reg * reg,int force_idx,bool export,uint8_t export_writemask)122 ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export,
123 uint8_t export_writemask)
124 {
125 /* for export, don't allocate anything but set component layout */
126 if (export) {
127 for (int i = 0; i < 4; i++)
128 reg->comp[i].c = i;
129 return;
130 }
131
132 unsigned idx = force_idx;
133
134 /* TODO: allocate into the same register if theres room
135 * note: the blob doesn't do it, so verify that it is indeed better
136 * also, doing it would conflict with scalar mov insertion
137 */
138
139 /* check if already allocated */
140 for (int i = 0; i < reg->ncomp; i++) {
141 if (reg->comp[i].alloc)
142 return;
143 }
144
145 if (force_idx < 0) {
146 for (idx = 0; idx < 64; idx++) {
147 if (reg_mask(ctx, idx) == 0)
148 break;
149 }
150 }
151 assert(idx != 64); /* TODO ran out of register space.. */
152
153 /* update max_reg value */
154 ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx);
155
156 unsigned mask = reg_mask(ctx, idx);
157
158 for (int i = 0; i < reg->ncomp; i++) {
159 /* don't allocate never used values */
160 if (reg->comp[i].ref_count == 0) {
161 reg->comp[i].c = 7;
162 continue;
163 }
164
165 /* TODO */
166 unsigned c = 1 ? i : (ffs(~mask) - 1);
167 mask |= 1 << c;
168 reg->comp[i].c = c;
169 reg_setmask(ctx, idx, c);
170 reg->comp[i].alloc = true;
171 }
172
173 reg->idx = idx;
174 ctx->live_regs[reg->idx] = reg;
175 }
176
177 /* reduce srcs ref_count and free if needed */
178 void
ra_src_free(struct ir2_context * ctx,struct ir2_instr * instr)179 ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
180 {
181 struct ir2_reg *reg;
182 struct ir2_reg_component *comp;
183
184 ir2_foreach_src (src, instr) {
185 if (src->type == IR2_SRC_CONST)
186 continue;
187
188 reg = get_reg_src(ctx, src);
189 /* XXX use before write case */
190
191 for (int i = 0; i < src_ncomp(instr); i++) {
192 comp = ®->comp[swiz_get(src->swizzle, i)];
193 if (!--comp->ref_count && reg->block_idx_free < 0) {
194 reg_freemask(ctx, reg->idx, comp->c);
195 comp->alloc = false;
196 }
197 }
198 }
199 }
200
201 /* free any regs left for a block */
202 void
ra_block_free(struct ir2_context * ctx,unsigned block)203 ra_block_free(struct ir2_context *ctx, unsigned block)
204 {
205 ir2_foreach_live_reg (reg, ctx) {
206 if (reg->block_idx_free != block)
207 continue;
208
209 for (int i = 0; i < reg->ncomp; i++) {
210 if (!reg->comp[i].alloc) /* XXX should never be true? */
211 continue;
212
213 reg_freemask(ctx, reg->idx, reg->comp[i].c);
214 reg->comp[i].alloc = false;
215 }
216 ctx->live_regs[reg->idx] = NULL;
217 }
218 }
219