1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright (C) 2018-2019 Alyssa Rosenzweig <[email protected]>
3*61046927SAndroid Build Coastguard Worker * Copyright (C) 2019 Collabora, Ltd.
4*61046927SAndroid Build Coastguard Worker *
5*61046927SAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a
6*61046927SAndroid Build Coastguard Worker * copy of this software and associated documentation files (the "Software"),
7*61046927SAndroid Build Coastguard Worker * to deal in the Software without restriction, including without limitation
8*61046927SAndroid Build Coastguard Worker * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9*61046927SAndroid Build Coastguard Worker * and/or sell copies of the Software, and to permit persons to whom the
10*61046927SAndroid Build Coastguard Worker * Software is furnished to do so, subject to the following conditions:
11*61046927SAndroid Build Coastguard Worker *
12*61046927SAndroid Build Coastguard Worker * The above copyright notice and this permission notice (including the next
13*61046927SAndroid Build Coastguard Worker * paragraph) shall be included in all copies or substantial portions of the
14*61046927SAndroid Build Coastguard Worker * Software.
15*61046927SAndroid Build Coastguard Worker *
16*61046927SAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*61046927SAndroid Build Coastguard Worker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*61046927SAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19*61046927SAndroid Build Coastguard Worker * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*61046927SAndroid Build Coastguard Worker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*61046927SAndroid Build Coastguard Worker * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*61046927SAndroid Build Coastguard Worker * SOFTWARE.
23*61046927SAndroid Build Coastguard Worker */
24*61046927SAndroid Build Coastguard Worker
25*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
26*61046927SAndroid Build Coastguard Worker #include "util/u_memory.h"
27*61046927SAndroid Build Coastguard Worker #include "compiler.h"
28*61046927SAndroid Build Coastguard Worker #include "midgard_ops.h"
29*61046927SAndroid Build Coastguard Worker #include "midgard_quirks.h"
30*61046927SAndroid Build Coastguard Worker
31*61046927SAndroid Build Coastguard Worker struct phys_reg {
32*61046927SAndroid Build Coastguard Worker /* Physical register: 0-31 */
33*61046927SAndroid Build Coastguard Worker unsigned reg;
34*61046927SAndroid Build Coastguard Worker
35*61046927SAndroid Build Coastguard Worker /* Byte offset into the physical register: 0-15 */
36*61046927SAndroid Build Coastguard Worker unsigned offset;
37*61046927SAndroid Build Coastguard Worker
38*61046927SAndroid Build Coastguard Worker /* log2(bytes per component) for fast mul/div */
39*61046927SAndroid Build Coastguard Worker unsigned shift;
40*61046927SAndroid Build Coastguard Worker };
41*61046927SAndroid Build Coastguard Worker
42*61046927SAndroid Build Coastguard Worker /* Shift up by reg_offset and horizontally by dst_offset. */
43*61046927SAndroid Build Coastguard Worker
44*61046927SAndroid Build Coastguard Worker static void
offset_swizzle(unsigned * swizzle,unsigned reg_offset,unsigned srcshift,unsigned dstshift,unsigned dst_offset)45*61046927SAndroid Build Coastguard Worker offset_swizzle(unsigned *swizzle, unsigned reg_offset, unsigned srcshift,
46*61046927SAndroid Build Coastguard Worker unsigned dstshift, unsigned dst_offset)
47*61046927SAndroid Build Coastguard Worker {
48*61046927SAndroid Build Coastguard Worker unsigned out[MIR_VEC_COMPONENTS];
49*61046927SAndroid Build Coastguard Worker
50*61046927SAndroid Build Coastguard Worker signed reg_comp = reg_offset >> srcshift;
51*61046927SAndroid Build Coastguard Worker signed dst_comp = dst_offset >> dstshift;
52*61046927SAndroid Build Coastguard Worker
53*61046927SAndroid Build Coastguard Worker unsigned max_component = (16 >> srcshift) - 1;
54*61046927SAndroid Build Coastguard Worker
55*61046927SAndroid Build Coastguard Worker assert(reg_comp << srcshift == reg_offset);
56*61046927SAndroid Build Coastguard Worker assert(dst_comp << dstshift == dst_offset);
57*61046927SAndroid Build Coastguard Worker
58*61046927SAndroid Build Coastguard Worker for (signed c = 0; c < MIR_VEC_COMPONENTS; ++c) {
59*61046927SAndroid Build Coastguard Worker signed comp = MAX2(c - dst_comp, 0);
60*61046927SAndroid Build Coastguard Worker out[c] = MIN2(swizzle[comp] + reg_comp, max_component);
61*61046927SAndroid Build Coastguard Worker }
62*61046927SAndroid Build Coastguard Worker
63*61046927SAndroid Build Coastguard Worker memcpy(swizzle, out, sizeof(out));
64*61046927SAndroid Build Coastguard Worker }
65*61046927SAndroid Build Coastguard Worker
66*61046927SAndroid Build Coastguard Worker /* Helper to return the default phys_reg for a given register */
67*61046927SAndroid Build Coastguard Worker
68*61046927SAndroid Build Coastguard Worker static struct phys_reg
default_phys_reg(int reg,unsigned shift)69*61046927SAndroid Build Coastguard Worker default_phys_reg(int reg, unsigned shift)
70*61046927SAndroid Build Coastguard Worker {
71*61046927SAndroid Build Coastguard Worker struct phys_reg r = {
72*61046927SAndroid Build Coastguard Worker .reg = reg,
73*61046927SAndroid Build Coastguard Worker .offset = 0,
74*61046927SAndroid Build Coastguard Worker .shift = shift,
75*61046927SAndroid Build Coastguard Worker };
76*61046927SAndroid Build Coastguard Worker
77*61046927SAndroid Build Coastguard Worker return r;
78*61046927SAndroid Build Coastguard Worker }
79*61046927SAndroid Build Coastguard Worker
80*61046927SAndroid Build Coastguard Worker /* Determine which physical register, swizzle, and mask a virtual
81*61046927SAndroid Build Coastguard Worker * register corresponds to */
82*61046927SAndroid Build Coastguard Worker
83*61046927SAndroid Build Coastguard Worker static struct phys_reg
index_to_reg(compiler_context * ctx,struct lcra_state * l,unsigned reg,unsigned shift)84*61046927SAndroid Build Coastguard Worker index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg,
85*61046927SAndroid Build Coastguard Worker unsigned shift)
86*61046927SAndroid Build Coastguard Worker {
87*61046927SAndroid Build Coastguard Worker /* Check for special cases */
88*61046927SAndroid Build Coastguard Worker if (reg == ~0)
89*61046927SAndroid Build Coastguard Worker return default_phys_reg(REGISTER_UNUSED, shift);
90*61046927SAndroid Build Coastguard Worker else if (reg >= SSA_FIXED_MINIMUM)
91*61046927SAndroid Build Coastguard Worker return default_phys_reg(SSA_REG_FROM_FIXED(reg), shift);
92*61046927SAndroid Build Coastguard Worker else if (!l)
93*61046927SAndroid Build Coastguard Worker return default_phys_reg(REGISTER_UNUSED, shift);
94*61046927SAndroid Build Coastguard Worker
95*61046927SAndroid Build Coastguard Worker struct phys_reg r = {
96*61046927SAndroid Build Coastguard Worker .reg = l->solutions[reg] / 16,
97*61046927SAndroid Build Coastguard Worker .offset = l->solutions[reg] & 0xF,
98*61046927SAndroid Build Coastguard Worker .shift = shift,
99*61046927SAndroid Build Coastguard Worker };
100*61046927SAndroid Build Coastguard Worker
101*61046927SAndroid Build Coastguard Worker /* Report that we actually use this register, and return it */
102*61046927SAndroid Build Coastguard Worker
103*61046927SAndroid Build Coastguard Worker if (r.reg < 16)
104*61046927SAndroid Build Coastguard Worker ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);
105*61046927SAndroid Build Coastguard Worker
106*61046927SAndroid Build Coastguard Worker return r;
107*61046927SAndroid Build Coastguard Worker }
108*61046927SAndroid Build Coastguard Worker
109*61046927SAndroid Build Coastguard Worker static void
set_class(unsigned * classes,unsigned node,unsigned class)110*61046927SAndroid Build Coastguard Worker set_class(unsigned *classes, unsigned node, unsigned class)
111*61046927SAndroid Build Coastguard Worker {
112*61046927SAndroid Build Coastguard Worker if (node < SSA_FIXED_MINIMUM && class != classes[node]) {
113*61046927SAndroid Build Coastguard Worker assert(classes[node] == REG_CLASS_WORK);
114*61046927SAndroid Build Coastguard Worker classes[node] = class;
115*61046927SAndroid Build Coastguard Worker }
116*61046927SAndroid Build Coastguard Worker }
117*61046927SAndroid Build Coastguard Worker
118*61046927SAndroid Build Coastguard Worker /* Special register classes impose special constraints on who can read their
119*61046927SAndroid Build Coastguard Worker * values, so check that */
120*61046927SAndroid Build Coastguard Worker
121*61046927SAndroid Build Coastguard Worker static bool ASSERTED
check_read_class(unsigned * classes,unsigned tag,unsigned node)122*61046927SAndroid Build Coastguard Worker check_read_class(unsigned *classes, unsigned tag, unsigned node)
123*61046927SAndroid Build Coastguard Worker {
124*61046927SAndroid Build Coastguard Worker /* Non-nodes are implicitly ok */
125*61046927SAndroid Build Coastguard Worker if (node >= SSA_FIXED_MINIMUM)
126*61046927SAndroid Build Coastguard Worker return true;
127*61046927SAndroid Build Coastguard Worker
128*61046927SAndroid Build Coastguard Worker switch (classes[node]) {
129*61046927SAndroid Build Coastguard Worker case REG_CLASS_LDST:
130*61046927SAndroid Build Coastguard Worker return (tag == TAG_LOAD_STORE_4);
131*61046927SAndroid Build Coastguard Worker case REG_CLASS_TEXR:
132*61046927SAndroid Build Coastguard Worker return (tag == TAG_TEXTURE_4);
133*61046927SAndroid Build Coastguard Worker case REG_CLASS_TEXW:
134*61046927SAndroid Build Coastguard Worker return (tag != TAG_LOAD_STORE_4);
135*61046927SAndroid Build Coastguard Worker case REG_CLASS_WORK:
136*61046927SAndroid Build Coastguard Worker return IS_ALU(tag);
137*61046927SAndroid Build Coastguard Worker default:
138*61046927SAndroid Build Coastguard Worker unreachable("Invalid class");
139*61046927SAndroid Build Coastguard Worker }
140*61046927SAndroid Build Coastguard Worker }
141*61046927SAndroid Build Coastguard Worker
142*61046927SAndroid Build Coastguard Worker static bool ASSERTED
check_write_class(unsigned * classes,unsigned tag,unsigned node)143*61046927SAndroid Build Coastguard Worker check_write_class(unsigned *classes, unsigned tag, unsigned node)
144*61046927SAndroid Build Coastguard Worker {
145*61046927SAndroid Build Coastguard Worker /* Non-nodes are implicitly ok */
146*61046927SAndroid Build Coastguard Worker if (node >= SSA_FIXED_MINIMUM)
147*61046927SAndroid Build Coastguard Worker return true;
148*61046927SAndroid Build Coastguard Worker
149*61046927SAndroid Build Coastguard Worker switch (classes[node]) {
150*61046927SAndroid Build Coastguard Worker case REG_CLASS_TEXR:
151*61046927SAndroid Build Coastguard Worker return true;
152*61046927SAndroid Build Coastguard Worker case REG_CLASS_TEXW:
153*61046927SAndroid Build Coastguard Worker return (tag == TAG_TEXTURE_4);
154*61046927SAndroid Build Coastguard Worker case REG_CLASS_LDST:
155*61046927SAndroid Build Coastguard Worker case REG_CLASS_WORK:
156*61046927SAndroid Build Coastguard Worker return IS_ALU(tag) || (tag == TAG_LOAD_STORE_4);
157*61046927SAndroid Build Coastguard Worker default:
158*61046927SAndroid Build Coastguard Worker unreachable("Invalid class");
159*61046927SAndroid Build Coastguard Worker }
160*61046927SAndroid Build Coastguard Worker }
161*61046927SAndroid Build Coastguard Worker
162*61046927SAndroid Build Coastguard Worker /* Prepass before RA to ensure special class restrictions are met. The idea is
163*61046927SAndroid Build Coastguard Worker * to create a bit field of types of instructions that read a particular index.
164*61046927SAndroid Build Coastguard Worker * Later, we'll add moves as appropriate and rewrite to specialize by type. */
165*61046927SAndroid Build Coastguard Worker
166*61046927SAndroid Build Coastguard Worker static void
mark_node_class(unsigned * bitfield,unsigned node)167*61046927SAndroid Build Coastguard Worker mark_node_class(unsigned *bitfield, unsigned node)
168*61046927SAndroid Build Coastguard Worker {
169*61046927SAndroid Build Coastguard Worker if (node < SSA_FIXED_MINIMUM)
170*61046927SAndroid Build Coastguard Worker BITSET_SET(bitfield, node);
171*61046927SAndroid Build Coastguard Worker }
172*61046927SAndroid Build Coastguard Worker
173*61046927SAndroid Build Coastguard Worker void
mir_lower_special_reads(compiler_context * ctx)174*61046927SAndroid Build Coastguard Worker mir_lower_special_reads(compiler_context *ctx)
175*61046927SAndroid Build Coastguard Worker {
176*61046927SAndroid Build Coastguard Worker mir_compute_temp_count(ctx);
177*61046927SAndroid Build Coastguard Worker size_t sz = BITSET_WORDS(ctx->temp_count) * sizeof(BITSET_WORD);
178*61046927SAndroid Build Coastguard Worker
179*61046927SAndroid Build Coastguard Worker /* Bitfields for the various types of registers we could have. aluw can
180*61046927SAndroid Build Coastguard Worker * be written by either ALU or load/store */
181*61046927SAndroid Build Coastguard Worker
182*61046927SAndroid Build Coastguard Worker unsigned *alur = calloc(sz, 1);
183*61046927SAndroid Build Coastguard Worker unsigned *aluw = calloc(sz, 1);
184*61046927SAndroid Build Coastguard Worker unsigned *brar = calloc(sz, 1);
185*61046927SAndroid Build Coastguard Worker unsigned *ldst = calloc(sz, 1);
186*61046927SAndroid Build Coastguard Worker unsigned *texr = calloc(sz, 1);
187*61046927SAndroid Build Coastguard Worker unsigned *texw = calloc(sz, 1);
188*61046927SAndroid Build Coastguard Worker
189*61046927SAndroid Build Coastguard Worker /* Pass #1 is analysis, a linear scan to fill out the bitfields */
190*61046927SAndroid Build Coastguard Worker
191*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global(ctx, ins) {
192*61046927SAndroid Build Coastguard Worker switch (ins->type) {
193*61046927SAndroid Build Coastguard Worker case TAG_ALU_4:
194*61046927SAndroid Build Coastguard Worker mark_node_class(aluw, ins->dest);
195*61046927SAndroid Build Coastguard Worker mark_node_class(alur, ins->src[0]);
196*61046927SAndroid Build Coastguard Worker mark_node_class(alur, ins->src[1]);
197*61046927SAndroid Build Coastguard Worker mark_node_class(alur, ins->src[2]);
198*61046927SAndroid Build Coastguard Worker
199*61046927SAndroid Build Coastguard Worker if (ins->compact_branch && ins->writeout)
200*61046927SAndroid Build Coastguard Worker mark_node_class(brar, ins->src[0]);
201*61046927SAndroid Build Coastguard Worker
202*61046927SAndroid Build Coastguard Worker break;
203*61046927SAndroid Build Coastguard Worker
204*61046927SAndroid Build Coastguard Worker case TAG_LOAD_STORE_4:
205*61046927SAndroid Build Coastguard Worker mark_node_class(aluw, ins->dest);
206*61046927SAndroid Build Coastguard Worker mark_node_class(ldst, ins->src[0]);
207*61046927SAndroid Build Coastguard Worker mark_node_class(ldst, ins->src[1]);
208*61046927SAndroid Build Coastguard Worker mark_node_class(ldst, ins->src[2]);
209*61046927SAndroid Build Coastguard Worker mark_node_class(ldst, ins->src[3]);
210*61046927SAndroid Build Coastguard Worker break;
211*61046927SAndroid Build Coastguard Worker
212*61046927SAndroid Build Coastguard Worker case TAG_TEXTURE_4:
213*61046927SAndroid Build Coastguard Worker mark_node_class(texr, ins->src[0]);
214*61046927SAndroid Build Coastguard Worker mark_node_class(texr, ins->src[1]);
215*61046927SAndroid Build Coastguard Worker mark_node_class(texr, ins->src[2]);
216*61046927SAndroid Build Coastguard Worker mark_node_class(texw, ins->dest);
217*61046927SAndroid Build Coastguard Worker break;
218*61046927SAndroid Build Coastguard Worker
219*61046927SAndroid Build Coastguard Worker default:
220*61046927SAndroid Build Coastguard Worker break;
221*61046927SAndroid Build Coastguard Worker }
222*61046927SAndroid Build Coastguard Worker }
223*61046927SAndroid Build Coastguard Worker
224*61046927SAndroid Build Coastguard Worker /* Pass #2 is lowering now that we've analyzed all the classes.
225*61046927SAndroid Build Coastguard Worker * Conceptually, if an index is only marked for a single type of use,
226*61046927SAndroid Build Coastguard Worker * there is nothing to lower. If it is marked for different uses, we
227*61046927SAndroid Build Coastguard Worker * split up based on the number of types of uses. To do so, we divide
228*61046927SAndroid Build Coastguard Worker * into N distinct classes of use (where N>1 by definition), emit N-1
229*61046927SAndroid Build Coastguard Worker * moves from the index to copies of the index, and finally rewrite N-1
230*61046927SAndroid Build Coastguard Worker * of the types of uses to use the corresponding move */
231*61046927SAndroid Build Coastguard Worker
232*61046927SAndroid Build Coastguard Worker unsigned spill_idx = ctx->temp_count;
233*61046927SAndroid Build Coastguard Worker
234*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ctx->temp_count; ++i) {
235*61046927SAndroid Build Coastguard Worker bool is_alur = BITSET_TEST(alur, i);
236*61046927SAndroid Build Coastguard Worker bool is_aluw = BITSET_TEST(aluw, i);
237*61046927SAndroid Build Coastguard Worker bool is_brar = BITSET_TEST(brar, i);
238*61046927SAndroid Build Coastguard Worker bool is_ldst = BITSET_TEST(ldst, i);
239*61046927SAndroid Build Coastguard Worker bool is_texr = BITSET_TEST(texr, i);
240*61046927SAndroid Build Coastguard Worker bool is_texw = BITSET_TEST(texw, i);
241*61046927SAndroid Build Coastguard Worker
242*61046927SAndroid Build Coastguard Worker /* Analyse to check how many distinct uses there are. ALU ops
243*61046927SAndroid Build Coastguard Worker * (alur) can read the results of the texture pipeline (texw)
244*61046927SAndroid Build Coastguard Worker * but not ldst or texr. Load/store ops (ldst) cannot read
245*61046927SAndroid Build Coastguard Worker * anything but load/store inputs. Texture pipeline cannot read
246*61046927SAndroid Build Coastguard Worker * anything but texture inputs. TODO: Simplify. */
247*61046927SAndroid Build Coastguard Worker
248*61046927SAndroid Build Coastguard Worker bool collision = (is_alur && (is_ldst || is_texr)) ||
249*61046927SAndroid Build Coastguard Worker (is_ldst && (is_alur || is_texr || is_texw)) ||
250*61046927SAndroid Build Coastguard Worker (is_texr && (is_alur || is_ldst || is_texw)) ||
251*61046927SAndroid Build Coastguard Worker (is_texw && (is_aluw || is_ldst || is_texr)) ||
252*61046927SAndroid Build Coastguard Worker (is_brar && is_texw);
253*61046927SAndroid Build Coastguard Worker
254*61046927SAndroid Build Coastguard Worker if (!collision)
255*61046927SAndroid Build Coastguard Worker continue;
256*61046927SAndroid Build Coastguard Worker
257*61046927SAndroid Build Coastguard Worker /* Use the index as-is as the work copy. Emit copies for
258*61046927SAndroid Build Coastguard Worker * special uses */
259*61046927SAndroid Build Coastguard Worker
260*61046927SAndroid Build Coastguard Worker unsigned classes[] = {TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4,
261*61046927SAndroid Build Coastguard Worker TAG_ALU_4};
262*61046927SAndroid Build Coastguard Worker bool collisions[] = {is_ldst, is_texr, is_texw && is_aluw, is_brar};
263*61046927SAndroid Build Coastguard Worker
264*61046927SAndroid Build Coastguard Worker for (unsigned j = 0; j < ARRAY_SIZE(collisions); ++j) {
265*61046927SAndroid Build Coastguard Worker if (!collisions[j])
266*61046927SAndroid Build Coastguard Worker continue;
267*61046927SAndroid Build Coastguard Worker
268*61046927SAndroid Build Coastguard Worker /* When the hazard is from reading, we move and rewrite
269*61046927SAndroid Build Coastguard Worker * sources (typical case). When it's from writing, we
270*61046927SAndroid Build Coastguard Worker * flip the move and rewrite destinations (obscure,
271*61046927SAndroid Build Coastguard Worker * only from control flow -- impossible in SSA) */
272*61046927SAndroid Build Coastguard Worker
273*61046927SAndroid Build Coastguard Worker bool hazard_write = (j == 2);
274*61046927SAndroid Build Coastguard Worker
275*61046927SAndroid Build Coastguard Worker unsigned idx = spill_idx++;
276*61046927SAndroid Build Coastguard Worker
277*61046927SAndroid Build Coastguard Worker /* Insert move before each read/write, depending on the
278*61046927SAndroid Build Coastguard Worker * hazard we're trying to account for */
279*61046927SAndroid Build Coastguard Worker
280*61046927SAndroid Build Coastguard Worker mir_foreach_block(ctx, block_) {
281*61046927SAndroid Build Coastguard Worker midgard_block *block = (midgard_block *)block_;
282*61046927SAndroid Build Coastguard Worker midgard_instruction *mov = NULL;
283*61046927SAndroid Build Coastguard Worker
284*61046927SAndroid Build Coastguard Worker mir_foreach_instr_in_block_safe(block, pre_use) {
285*61046927SAndroid Build Coastguard Worker if (pre_use->type != classes[j])
286*61046927SAndroid Build Coastguard Worker continue;
287*61046927SAndroid Build Coastguard Worker
288*61046927SAndroid Build Coastguard Worker if (hazard_write) {
289*61046927SAndroid Build Coastguard Worker if (pre_use->dest != i)
290*61046927SAndroid Build Coastguard Worker continue;
291*61046927SAndroid Build Coastguard Worker
292*61046927SAndroid Build Coastguard Worker midgard_instruction m = v_mov(idx, i);
293*61046927SAndroid Build Coastguard Worker m.dest_type = pre_use->dest_type;
294*61046927SAndroid Build Coastguard Worker m.src_types[1] = m.dest_type;
295*61046927SAndroid Build Coastguard Worker m.mask = pre_use->mask;
296*61046927SAndroid Build Coastguard Worker
297*61046927SAndroid Build Coastguard Worker midgard_instruction *use = mir_next_op(pre_use);
298*61046927SAndroid Build Coastguard Worker assert(use);
299*61046927SAndroid Build Coastguard Worker mir_insert_instruction_before(ctx, use, m);
300*61046927SAndroid Build Coastguard Worker mir_rewrite_index_dst_single(pre_use, i, idx);
301*61046927SAndroid Build Coastguard Worker } else {
302*61046927SAndroid Build Coastguard Worker if (!mir_has_arg(pre_use, i))
303*61046927SAndroid Build Coastguard Worker continue;
304*61046927SAndroid Build Coastguard Worker
305*61046927SAndroid Build Coastguard Worker unsigned mask = mir_from_bytemask(
306*61046927SAndroid Build Coastguard Worker mir_round_bytemask_up(
307*61046927SAndroid Build Coastguard Worker mir_bytemask_of_read_components(pre_use, i), 32),
308*61046927SAndroid Build Coastguard Worker 32);
309*61046927SAndroid Build Coastguard Worker
310*61046927SAndroid Build Coastguard Worker if (mov == NULL || !mir_is_ssa(i)) {
311*61046927SAndroid Build Coastguard Worker midgard_instruction m = v_mov(i, spill_idx++);
312*61046927SAndroid Build Coastguard Worker m.mask = mask;
313*61046927SAndroid Build Coastguard Worker mov = mir_insert_instruction_before(ctx, pre_use, m);
314*61046927SAndroid Build Coastguard Worker } else {
315*61046927SAndroid Build Coastguard Worker mov->mask |= mask;
316*61046927SAndroid Build Coastguard Worker }
317*61046927SAndroid Build Coastguard Worker
318*61046927SAndroid Build Coastguard Worker mir_rewrite_index_src_single(pre_use, i, mov->dest);
319*61046927SAndroid Build Coastguard Worker }
320*61046927SAndroid Build Coastguard Worker }
321*61046927SAndroid Build Coastguard Worker }
322*61046927SAndroid Build Coastguard Worker }
323*61046927SAndroid Build Coastguard Worker }
324*61046927SAndroid Build Coastguard Worker
325*61046927SAndroid Build Coastguard Worker free(alur);
326*61046927SAndroid Build Coastguard Worker free(aluw);
327*61046927SAndroid Build Coastguard Worker free(brar);
328*61046927SAndroid Build Coastguard Worker free(ldst);
329*61046927SAndroid Build Coastguard Worker free(texr);
330*61046927SAndroid Build Coastguard Worker free(texw);
331*61046927SAndroid Build Coastguard Worker }
332*61046927SAndroid Build Coastguard Worker
333*61046927SAndroid Build Coastguard Worker static void
mir_compute_interference(compiler_context * ctx,struct lcra_state * l)334*61046927SAndroid Build Coastguard Worker mir_compute_interference(compiler_context *ctx, struct lcra_state *l)
335*61046927SAndroid Build Coastguard Worker {
336*61046927SAndroid Build Coastguard Worker /* First, we need liveness information to be computed per block */
337*61046927SAndroid Build Coastguard Worker mir_compute_liveness(ctx);
338*61046927SAndroid Build Coastguard Worker
339*61046927SAndroid Build Coastguard Worker /* We need to force r1.w live throughout a blend shader */
340*61046927SAndroid Build Coastguard Worker
341*61046927SAndroid Build Coastguard Worker if (ctx->inputs->is_blend) {
342*61046927SAndroid Build Coastguard Worker unsigned r1w = ~0;
343*61046927SAndroid Build Coastguard Worker
344*61046927SAndroid Build Coastguard Worker mir_foreach_block(ctx, _block) {
345*61046927SAndroid Build Coastguard Worker midgard_block *block = (midgard_block *)_block;
346*61046927SAndroid Build Coastguard Worker mir_foreach_instr_in_block_rev(block, ins) {
347*61046927SAndroid Build Coastguard Worker if (ins->writeout)
348*61046927SAndroid Build Coastguard Worker r1w = ins->dest;
349*61046927SAndroid Build Coastguard Worker }
350*61046927SAndroid Build Coastguard Worker
351*61046927SAndroid Build Coastguard Worker if (r1w != ~0)
352*61046927SAndroid Build Coastguard Worker break;
353*61046927SAndroid Build Coastguard Worker }
354*61046927SAndroid Build Coastguard Worker
355*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global(ctx, ins) {
356*61046927SAndroid Build Coastguard Worker if (ins->dest < ctx->temp_count)
357*61046927SAndroid Build Coastguard Worker lcra_add_node_interference(l, ins->dest, mir_bytemask(ins), r1w,
358*61046927SAndroid Build Coastguard Worker 0xF);
359*61046927SAndroid Build Coastguard Worker }
360*61046927SAndroid Build Coastguard Worker }
361*61046927SAndroid Build Coastguard Worker
362*61046927SAndroid Build Coastguard Worker /* Now that every block has live_in/live_out computed, we can determine
363*61046927SAndroid Build Coastguard Worker * interference by walking each block linearly. Take live_out at the
364*61046927SAndroid Build Coastguard Worker * end of each block and walk the block backwards. */
365*61046927SAndroid Build Coastguard Worker
366*61046927SAndroid Build Coastguard Worker mir_foreach_block(ctx, _blk) {
367*61046927SAndroid Build Coastguard Worker midgard_block *blk = (midgard_block *)_blk;
368*61046927SAndroid Build Coastguard Worker
369*61046927SAndroid Build Coastguard Worker /* The scalar and vector units run in parallel. We need to make
370*61046927SAndroid Build Coastguard Worker * sure they don't write to same portion of the register file
371*61046927SAndroid Build Coastguard Worker * otherwise the result is undefined. Add interferences to
372*61046927SAndroid Build Coastguard Worker * avoid this situation.
373*61046927SAndroid Build Coastguard Worker */
374*61046927SAndroid Build Coastguard Worker util_dynarray_foreach(&blk->bundles, midgard_bundle, bundle) {
375*61046927SAndroid Build Coastguard Worker midgard_instruction *instrs[2][4];
376*61046927SAndroid Build Coastguard Worker unsigned instr_count[2] = {0, 0};
377*61046927SAndroid Build Coastguard Worker
378*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < bundle->instruction_count; i++) {
379*61046927SAndroid Build Coastguard Worker if (bundle->instructions[i]->unit == UNIT_VMUL ||
380*61046927SAndroid Build Coastguard Worker bundle->instructions[i]->unit == UNIT_SADD)
381*61046927SAndroid Build Coastguard Worker instrs[0][instr_count[0]++] = bundle->instructions[i];
382*61046927SAndroid Build Coastguard Worker else
383*61046927SAndroid Build Coastguard Worker instrs[1][instr_count[1]++] = bundle->instructions[i];
384*61046927SAndroid Build Coastguard Worker }
385*61046927SAndroid Build Coastguard Worker
386*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(instr_count); i++) {
387*61046927SAndroid Build Coastguard Worker for (unsigned j = 0; j < instr_count[i]; j++) {
388*61046927SAndroid Build Coastguard Worker midgard_instruction *ins_a = instrs[i][j];
389*61046927SAndroid Build Coastguard Worker
390*61046927SAndroid Build Coastguard Worker if (ins_a->dest >= ctx->temp_count)
391*61046927SAndroid Build Coastguard Worker continue;
392*61046927SAndroid Build Coastguard Worker
393*61046927SAndroid Build Coastguard Worker for (unsigned k = j + 1; k < instr_count[i]; k++) {
394*61046927SAndroid Build Coastguard Worker midgard_instruction *ins_b = instrs[i][k];
395*61046927SAndroid Build Coastguard Worker
396*61046927SAndroid Build Coastguard Worker if (ins_b->dest >= ctx->temp_count)
397*61046927SAndroid Build Coastguard Worker continue;
398*61046927SAndroid Build Coastguard Worker
399*61046927SAndroid Build Coastguard Worker lcra_add_node_interference(l, ins_b->dest,
400*61046927SAndroid Build Coastguard Worker mir_bytemask(ins_b), ins_a->dest,
401*61046927SAndroid Build Coastguard Worker mir_bytemask(ins_a));
402*61046927SAndroid Build Coastguard Worker }
403*61046927SAndroid Build Coastguard Worker }
404*61046927SAndroid Build Coastguard Worker }
405*61046927SAndroid Build Coastguard Worker }
406*61046927SAndroid Build Coastguard Worker
407*61046927SAndroid Build Coastguard Worker uint16_t *live =
408*61046927SAndroid Build Coastguard Worker mem_dup(_blk->live_out, ctx->temp_count * sizeof(uint16_t));
409*61046927SAndroid Build Coastguard Worker
410*61046927SAndroid Build Coastguard Worker mir_foreach_instr_in_block_rev(blk, ins) {
411*61046927SAndroid Build Coastguard Worker /* Mark all registers live after the instruction as
412*61046927SAndroid Build Coastguard Worker * interfering with the destination */
413*61046927SAndroid Build Coastguard Worker
414*61046927SAndroid Build Coastguard Worker unsigned dest = ins->dest;
415*61046927SAndroid Build Coastguard Worker
416*61046927SAndroid Build Coastguard Worker if (dest < ctx->temp_count) {
417*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ctx->temp_count; ++i) {
418*61046927SAndroid Build Coastguard Worker if (live[i]) {
419*61046927SAndroid Build Coastguard Worker unsigned mask = mir_bytemask(ins);
420*61046927SAndroid Build Coastguard Worker lcra_add_node_interference(l, dest, mask, i, live[i]);
421*61046927SAndroid Build Coastguard Worker }
422*61046927SAndroid Build Coastguard Worker }
423*61046927SAndroid Build Coastguard Worker }
424*61046927SAndroid Build Coastguard Worker
425*61046927SAndroid Build Coastguard Worker /* Add blend shader interference: blend shaders might
426*61046927SAndroid Build Coastguard Worker * clobber r0-r3. */
427*61046927SAndroid Build Coastguard Worker if (ins->compact_branch && ins->writeout) {
428*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ctx->temp_count; ++i) {
429*61046927SAndroid Build Coastguard Worker if (!live[i])
430*61046927SAndroid Build Coastguard Worker continue;
431*61046927SAndroid Build Coastguard Worker
432*61046927SAndroid Build Coastguard Worker for (unsigned j = 0; j < 4; j++) {
433*61046927SAndroid Build Coastguard Worker lcra_add_node_interference(l, ctx->temp_count + j, 0xFFFF, i,
434*61046927SAndroid Build Coastguard Worker live[i]);
435*61046927SAndroid Build Coastguard Worker }
436*61046927SAndroid Build Coastguard Worker }
437*61046927SAndroid Build Coastguard Worker }
438*61046927SAndroid Build Coastguard Worker
439*61046927SAndroid Build Coastguard Worker /* Update live_in */
440*61046927SAndroid Build Coastguard Worker mir_liveness_ins_update(live, ins, ctx->temp_count);
441*61046927SAndroid Build Coastguard Worker }
442*61046927SAndroid Build Coastguard Worker
443*61046927SAndroid Build Coastguard Worker free(live);
444*61046927SAndroid Build Coastguard Worker }
445*61046927SAndroid Build Coastguard Worker }
446*61046927SAndroid Build Coastguard Worker
447*61046927SAndroid Build Coastguard Worker static bool
mir_is_64(midgard_instruction * ins)448*61046927SAndroid Build Coastguard Worker mir_is_64(midgard_instruction *ins)
449*61046927SAndroid Build Coastguard Worker {
450*61046927SAndroid Build Coastguard Worker if (nir_alu_type_get_type_size(ins->dest_type) == 64)
451*61046927SAndroid Build Coastguard Worker return true;
452*61046927SAndroid Build Coastguard Worker
453*61046927SAndroid Build Coastguard Worker mir_foreach_src(ins, v) {
454*61046927SAndroid Build Coastguard Worker if (nir_alu_type_get_type_size(ins->src_types[v]) == 64)
455*61046927SAndroid Build Coastguard Worker return true;
456*61046927SAndroid Build Coastguard Worker }
457*61046927SAndroid Build Coastguard Worker
458*61046927SAndroid Build Coastguard Worker return false;
459*61046927SAndroid Build Coastguard Worker }
460*61046927SAndroid Build Coastguard Worker
461*61046927SAndroid Build Coastguard Worker /*
462*61046927SAndroid Build Coastguard Worker * Determine if a shader needs a contiguous workgroup. This impacts register
463*61046927SAndroid Build Coastguard Worker * allocation. TODO: Optimize if barriers and local memory are unused.
464*61046927SAndroid Build Coastguard Worker */
465*61046927SAndroid Build Coastguard Worker static bool
needs_contiguous_workgroup(compiler_context * ctx)466*61046927SAndroid Build Coastguard Worker needs_contiguous_workgroup(compiler_context *ctx)
467*61046927SAndroid Build Coastguard Worker {
468*61046927SAndroid Build Coastguard Worker return gl_shader_stage_uses_workgroup(ctx->stage);
469*61046927SAndroid Build Coastguard Worker }
470*61046927SAndroid Build Coastguard Worker
471*61046927SAndroid Build Coastguard Worker /*
472*61046927SAndroid Build Coastguard Worker * Determine an upper-bound on the number of threads in a workgroup. The GL
473*61046927SAndroid Build Coastguard Worker * driver reports 128 for the maximum number of threads (the minimum-maximum in
474*61046927SAndroid Build Coastguard Worker * OpenGL ES 3.1), so we pessimistically assume 128 threads for variable
475*61046927SAndroid Build Coastguard Worker * workgroups.
476*61046927SAndroid Build Coastguard Worker */
477*61046927SAndroid Build Coastguard Worker static unsigned
max_threads_per_workgroup(compiler_context * ctx)478*61046927SAndroid Build Coastguard Worker max_threads_per_workgroup(compiler_context *ctx)
479*61046927SAndroid Build Coastguard Worker {
480*61046927SAndroid Build Coastguard Worker if (ctx->nir->info.workgroup_size_variable) {
481*61046927SAndroid Build Coastguard Worker return 128;
482*61046927SAndroid Build Coastguard Worker } else {
483*61046927SAndroid Build Coastguard Worker return ctx->nir->info.workgroup_size[0] *
484*61046927SAndroid Build Coastguard Worker ctx->nir->info.workgroup_size[1] *
485*61046927SAndroid Build Coastguard Worker ctx->nir->info.workgroup_size[2];
486*61046927SAndroid Build Coastguard Worker }
487*61046927SAndroid Build Coastguard Worker }
488*61046927SAndroid Build Coastguard Worker
489*61046927SAndroid Build Coastguard Worker /*
490*61046927SAndroid Build Coastguard Worker * Calculate the maximum number of work registers available to the shader.
491*61046927SAndroid Build Coastguard Worker * Architecturally, Midgard shaders may address up to 16 work registers, but
492*61046927SAndroid Build Coastguard Worker * various features impose other limits:
493*61046927SAndroid Build Coastguard Worker *
494*61046927SAndroid Build Coastguard Worker * 1. Blend shaders are limited to 8 registers by ABI.
495*61046927SAndroid Build Coastguard Worker * 2. If there are more than 8 register-mapped uniforms, then additional
496*61046927SAndroid Build Coastguard Worker * register-mapped uniforms use space that otherwise would be used for work
497*61046927SAndroid Build Coastguard Worker * registers.
498*61046927SAndroid Build Coastguard Worker * 3. If more than 4 registers are used, at most 128 threads may be spawned. If
499*61046927SAndroid Build Coastguard Worker * more than 8 registers are used, at most 64 threads may be spawned. These
500*61046927SAndroid Build Coastguard Worker * limits are architecturally visible in compute kernels that require an
501*61046927SAndroid Build Coastguard Worker * entire workgroup to be spawned at once (for barriers or local memory to
502*61046927SAndroid Build Coastguard Worker * work properly).
503*61046927SAndroid Build Coastguard Worker */
504*61046927SAndroid Build Coastguard Worker static unsigned
max_work_registers(compiler_context * ctx)505*61046927SAndroid Build Coastguard Worker max_work_registers(compiler_context *ctx)
506*61046927SAndroid Build Coastguard Worker {
507*61046927SAndroid Build Coastguard Worker if (ctx->inputs->is_blend)
508*61046927SAndroid Build Coastguard Worker return 8;
509*61046927SAndroid Build Coastguard Worker
510*61046927SAndroid Build Coastguard Worker unsigned rmu_vec4 = ctx->info->push.count / 4;
511*61046927SAndroid Build Coastguard Worker unsigned max_work_registers = (rmu_vec4 >= 8) ? (24 - rmu_vec4) : 16;
512*61046927SAndroid Build Coastguard Worker
513*61046927SAndroid Build Coastguard Worker if (needs_contiguous_workgroup(ctx)) {
514*61046927SAndroid Build Coastguard Worker unsigned threads = max_threads_per_workgroup(ctx);
515*61046927SAndroid Build Coastguard Worker assert(threads <= 128 && "maximum threads in ABI exceeded");
516*61046927SAndroid Build Coastguard Worker
517*61046927SAndroid Build Coastguard Worker if (threads > 64)
518*61046927SAndroid Build Coastguard Worker max_work_registers = MIN2(max_work_registers, 8);
519*61046927SAndroid Build Coastguard Worker }
520*61046927SAndroid Build Coastguard Worker
521*61046927SAndroid Build Coastguard Worker return max_work_registers;
522*61046927SAndroid Build Coastguard Worker }
523*61046927SAndroid Build Coastguard Worker
524*61046927SAndroid Build Coastguard Worker /* This routine performs the actual register allocation. It should be succeeded
525*61046927SAndroid Build Coastguard Worker * by install_registers */
526*61046927SAndroid Build Coastguard Worker
527*61046927SAndroid Build Coastguard Worker static struct lcra_state *
allocate_registers(compiler_context * ctx,bool * spilled)528*61046927SAndroid Build Coastguard Worker allocate_registers(compiler_context *ctx, bool *spilled)
529*61046927SAndroid Build Coastguard Worker {
530*61046927SAndroid Build Coastguard Worker int work_count = max_work_registers(ctx);
531*61046927SAndroid Build Coastguard Worker
532*61046927SAndroid Build Coastguard Worker /* No register allocation to do with no SSA */
533*61046927SAndroid Build Coastguard Worker mir_compute_temp_count(ctx);
534*61046927SAndroid Build Coastguard Worker if (!ctx->temp_count)
535*61046927SAndroid Build Coastguard Worker return NULL;
536*61046927SAndroid Build Coastguard Worker
537*61046927SAndroid Build Coastguard Worker /* Initialize LCRA. Allocate extra node at the end for r1-r3 for
538*61046927SAndroid Build Coastguard Worker * interference */
539*61046927SAndroid Build Coastguard Worker
540*61046927SAndroid Build Coastguard Worker struct lcra_state *l = lcra_alloc_equations(ctx->temp_count + 4, 5);
541*61046927SAndroid Build Coastguard Worker unsigned node_r1 = ctx->temp_count + 1;
542*61046927SAndroid Build Coastguard Worker
543*61046927SAndroid Build Coastguard Worker /* Starts of classes, in bytes */
544*61046927SAndroid Build Coastguard Worker l->class_start[REG_CLASS_WORK] = 16 * 0;
545*61046927SAndroid Build Coastguard Worker l->class_start[REG_CLASS_LDST] = 16 * 26;
546*61046927SAndroid Build Coastguard Worker l->class_start[REG_CLASS_TEXR] = 16 * 28;
547*61046927SAndroid Build Coastguard Worker l->class_start[REG_CLASS_TEXW] = 16 * 28;
548*61046927SAndroid Build Coastguard Worker
549*61046927SAndroid Build Coastguard Worker l->class_size[REG_CLASS_WORK] = 16 * work_count;
550*61046927SAndroid Build Coastguard Worker l->class_size[REG_CLASS_LDST] = 16 * 2;
551*61046927SAndroid Build Coastguard Worker l->class_size[REG_CLASS_TEXR] = 16 * 2;
552*61046927SAndroid Build Coastguard Worker l->class_size[REG_CLASS_TEXW] = 16 * 2;
553*61046927SAndroid Build Coastguard Worker
554*61046927SAndroid Build Coastguard Worker lcra_set_disjoint_class(l, REG_CLASS_TEXR, REG_CLASS_TEXW);
555*61046927SAndroid Build Coastguard Worker
556*61046927SAndroid Build Coastguard Worker /* To save space on T*20, we don't have real texture registers.
557*61046927SAndroid Build Coastguard Worker * Instead, tex inputs reuse the load/store pipeline registers, and
558*61046927SAndroid Build Coastguard Worker * tex outputs use work r0/r1. Note we still use TEXR/TEXW classes,
559*61046927SAndroid Build Coastguard Worker * noting that this handles interferences and sizes correctly. */
560*61046927SAndroid Build Coastguard Worker
561*61046927SAndroid Build Coastguard Worker if (ctx->quirks & MIDGARD_INTERPIPE_REG_ALIASING) {
562*61046927SAndroid Build Coastguard Worker l->class_start[REG_CLASS_TEXR] = l->class_start[REG_CLASS_LDST];
563*61046927SAndroid Build Coastguard Worker l->class_start[REG_CLASS_TEXW] = l->class_start[REG_CLASS_WORK];
564*61046927SAndroid Build Coastguard Worker }
565*61046927SAndroid Build Coastguard Worker
566*61046927SAndroid Build Coastguard Worker unsigned *found_class = calloc(sizeof(unsigned), ctx->temp_count);
567*61046927SAndroid Build Coastguard Worker unsigned *min_alignment = calloc(sizeof(unsigned), ctx->temp_count);
568*61046927SAndroid Build Coastguard Worker unsigned *min_bound = calloc(sizeof(unsigned), ctx->temp_count);
569*61046927SAndroid Build Coastguard Worker
570*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global(ctx, ins) {
571*61046927SAndroid Build Coastguard Worker /* Swizzles of 32-bit sources on 64-bit instructions need to be
572*61046927SAndroid Build Coastguard Worker * aligned to either bottom (xy) or top (zw). More general
573*61046927SAndroid Build Coastguard Worker * swizzle lowering should happen prior to scheduling (TODO),
574*61046927SAndroid Build Coastguard Worker * but once we get RA we shouldn't disrupt this further. Align
575*61046927SAndroid Build Coastguard Worker * sources of 64-bit instructions. */
576*61046927SAndroid Build Coastguard Worker
577*61046927SAndroid Build Coastguard Worker if (ins->type == TAG_ALU_4 && mir_is_64(ins)) {
578*61046927SAndroid Build Coastguard Worker mir_foreach_src(ins, v) {
579*61046927SAndroid Build Coastguard Worker unsigned s = ins->src[v];
580*61046927SAndroid Build Coastguard Worker
581*61046927SAndroid Build Coastguard Worker if (s < ctx->temp_count)
582*61046927SAndroid Build Coastguard Worker min_alignment[s] = MAX2(3, min_alignment[s]);
583*61046927SAndroid Build Coastguard Worker }
584*61046927SAndroid Build Coastguard Worker }
585*61046927SAndroid Build Coastguard Worker
586*61046927SAndroid Build Coastguard Worker if (ins->type == TAG_LOAD_STORE_4 && OP_HAS_ADDRESS(ins->op)) {
587*61046927SAndroid Build Coastguard Worker mir_foreach_src(ins, v) {
588*61046927SAndroid Build Coastguard Worker unsigned s = ins->src[v];
589*61046927SAndroid Build Coastguard Worker unsigned size = nir_alu_type_get_type_size(ins->src_types[v]);
590*61046927SAndroid Build Coastguard Worker
591*61046927SAndroid Build Coastguard Worker if (s < ctx->temp_count)
592*61046927SAndroid Build Coastguard Worker min_alignment[s] = MAX2((size == 64) ? 3 : 2, min_alignment[s]);
593*61046927SAndroid Build Coastguard Worker }
594*61046927SAndroid Build Coastguard Worker }
595*61046927SAndroid Build Coastguard Worker
596*61046927SAndroid Build Coastguard Worker /* Anything read as 16-bit needs proper alignment to ensure the
597*61046927SAndroid Build Coastguard Worker * resulting code can be packed.
598*61046927SAndroid Build Coastguard Worker */
599*61046927SAndroid Build Coastguard Worker mir_foreach_src(ins, s) {
600*61046927SAndroid Build Coastguard Worker unsigned src_size = nir_alu_type_get_type_size(ins->src_types[s]);
601*61046927SAndroid Build Coastguard Worker if (src_size == 16 && ins->src[s] < SSA_FIXED_MINIMUM)
602*61046927SAndroid Build Coastguard Worker min_bound[ins->src[s]] = MAX2(min_bound[ins->src[s]], 8);
603*61046927SAndroid Build Coastguard Worker }
604*61046927SAndroid Build Coastguard Worker
605*61046927SAndroid Build Coastguard Worker /* Everything after this concerns only the destination, not the
606*61046927SAndroid Build Coastguard Worker * sources.
607*61046927SAndroid Build Coastguard Worker */
608*61046927SAndroid Build Coastguard Worker if (ins->dest >= SSA_FIXED_MINIMUM)
609*61046927SAndroid Build Coastguard Worker continue;
610*61046927SAndroid Build Coastguard Worker
611*61046927SAndroid Build Coastguard Worker unsigned size = nir_alu_type_get_type_size(ins->dest_type);
612*61046927SAndroid Build Coastguard Worker
613*61046927SAndroid Build Coastguard Worker if (ins->is_pack)
614*61046927SAndroid Build Coastguard Worker size = 32;
615*61046927SAndroid Build Coastguard Worker
616*61046927SAndroid Build Coastguard Worker /* 0 for x, 1 for xy, 2 for xyz, 3 for xyzw */
617*61046927SAndroid Build Coastguard Worker int comps1 = util_logbase2(ins->mask);
618*61046927SAndroid Build Coastguard Worker
619*61046927SAndroid Build Coastguard Worker int bytes = (comps1 + 1) * (size / 8);
620*61046927SAndroid Build Coastguard Worker
621*61046927SAndroid Build Coastguard Worker /* Use the largest class if there's ambiguity, this
622*61046927SAndroid Build Coastguard Worker * handles partial writes */
623*61046927SAndroid Build Coastguard Worker
624*61046927SAndroid Build Coastguard Worker int dest = ins->dest;
625*61046927SAndroid Build Coastguard Worker found_class[dest] = MAX2(found_class[dest], bytes);
626*61046927SAndroid Build Coastguard Worker
627*61046927SAndroid Build Coastguard Worker min_alignment[dest] =
628*61046927SAndroid Build Coastguard Worker MAX2(min_alignment[dest], (size == 16) ? 1 : /* (1 << 1) = 2-byte */
629*61046927SAndroid Build Coastguard Worker (size == 32) ? 2
630*61046927SAndroid Build Coastguard Worker : /* (1 << 2) = 4-byte */
631*61046927SAndroid Build Coastguard Worker (size == 64) ? 3
632*61046927SAndroid Build Coastguard Worker : /* (1 << 3) = 8-byte */
633*61046927SAndroid Build Coastguard Worker 3); /* 8-bit todo */
634*61046927SAndroid Build Coastguard Worker
635*61046927SAndroid Build Coastguard Worker /* We can't cross xy/zw boundaries. TODO: vec8 can */
636*61046927SAndroid Build Coastguard Worker if (size == 16 && min_alignment[dest] != 4)
637*61046927SAndroid Build Coastguard Worker min_bound[dest] = 8;
638*61046927SAndroid Build Coastguard Worker
639*61046927SAndroid Build Coastguard Worker /* We don't have a swizzle for the conditional and we don't
640*61046927SAndroid Build Coastguard Worker * want to muck with the conditional itself, so just force
641*61046927SAndroid Build Coastguard Worker * alignment for now */
642*61046927SAndroid Build Coastguard Worker
643*61046927SAndroid Build Coastguard Worker if (ins->type == TAG_ALU_4 && OP_IS_CSEL_V(ins->op)) {
644*61046927SAndroid Build Coastguard Worker min_alignment[dest] = 4; /* 1 << 4= 16-byte = vec4 */
645*61046927SAndroid Build Coastguard Worker
646*61046927SAndroid Build Coastguard Worker /* LCRA assumes bound >= alignment */
647*61046927SAndroid Build Coastguard Worker min_bound[dest] = 16;
648*61046927SAndroid Build Coastguard Worker }
649*61046927SAndroid Build Coastguard Worker
650*61046927SAndroid Build Coastguard Worker /* Since ld/st swizzles and masks are 32-bit only, we need them
651*61046927SAndroid Build Coastguard Worker * aligned to enable final packing */
652*61046927SAndroid Build Coastguard Worker if (ins->type == TAG_LOAD_STORE_4)
653*61046927SAndroid Build Coastguard Worker min_alignment[dest] = MAX2(min_alignment[dest], 2);
654*61046927SAndroid Build Coastguard Worker }
655*61046927SAndroid Build Coastguard Worker
656*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ctx->temp_count; ++i) {
657*61046927SAndroid Build Coastguard Worker lcra_set_alignment(l, i, min_alignment[i] ? min_alignment[i] : 2,
658*61046927SAndroid Build Coastguard Worker min_bound[i] ? min_bound[i] : 16);
659*61046927SAndroid Build Coastguard Worker lcra_restrict_range(l, i, found_class[i]);
660*61046927SAndroid Build Coastguard Worker }
661*61046927SAndroid Build Coastguard Worker
662*61046927SAndroid Build Coastguard Worker free(found_class);
663*61046927SAndroid Build Coastguard Worker free(min_alignment);
664*61046927SAndroid Build Coastguard Worker free(min_bound);
665*61046927SAndroid Build Coastguard Worker
666*61046927SAndroid Build Coastguard Worker /* Next, we'll determine semantic class. We default to zero (work).
667*61046927SAndroid Build Coastguard Worker * But, if we're used with a special operation, that will force us to a
668*61046927SAndroid Build Coastguard Worker * particular class. Each node must be assigned to exactly one class; a
669*61046927SAndroid Build Coastguard Worker * prepass before RA should have lowered what-would-have-been
670*61046927SAndroid Build Coastguard Worker * multiclass nodes into a series of moves to break it up into multiple
671*61046927SAndroid Build Coastguard Worker * nodes (TODO) */
672*61046927SAndroid Build Coastguard Worker
673*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global(ctx, ins) {
674*61046927SAndroid Build Coastguard Worker /* Check if this operation imposes any classes */
675*61046927SAndroid Build Coastguard Worker
676*61046927SAndroid Build Coastguard Worker if (ins->type == TAG_LOAD_STORE_4) {
677*61046927SAndroid Build Coastguard Worker set_class(l->class, ins->src[0], REG_CLASS_LDST);
678*61046927SAndroid Build Coastguard Worker set_class(l->class, ins->src[1], REG_CLASS_LDST);
679*61046927SAndroid Build Coastguard Worker set_class(l->class, ins->src[2], REG_CLASS_LDST);
680*61046927SAndroid Build Coastguard Worker set_class(l->class, ins->src[3], REG_CLASS_LDST);
681*61046927SAndroid Build Coastguard Worker
682*61046927SAndroid Build Coastguard Worker if (OP_IS_VEC4_ONLY(ins->op)) {
683*61046927SAndroid Build Coastguard Worker lcra_restrict_range(l, ins->dest, 16);
684*61046927SAndroid Build Coastguard Worker lcra_restrict_range(l, ins->src[0], 16);
685*61046927SAndroid Build Coastguard Worker lcra_restrict_range(l, ins->src[1], 16);
686*61046927SAndroid Build Coastguard Worker lcra_restrict_range(l, ins->src[2], 16);
687*61046927SAndroid Build Coastguard Worker lcra_restrict_range(l, ins->src[3], 16);
688*61046927SAndroid Build Coastguard Worker }
689*61046927SAndroid Build Coastguard Worker } else if (ins->type == TAG_TEXTURE_4) {
690*61046927SAndroid Build Coastguard Worker set_class(l->class, ins->dest, REG_CLASS_TEXW);
691*61046927SAndroid Build Coastguard Worker set_class(l->class, ins->src[0], REG_CLASS_TEXR);
692*61046927SAndroid Build Coastguard Worker set_class(l->class, ins->src[1], REG_CLASS_TEXR);
693*61046927SAndroid Build Coastguard Worker set_class(l->class, ins->src[2], REG_CLASS_TEXR);
694*61046927SAndroid Build Coastguard Worker set_class(l->class, ins->src[3], REG_CLASS_TEXR);
695*61046927SAndroid Build Coastguard Worker }
696*61046927SAndroid Build Coastguard Worker }
697*61046927SAndroid Build Coastguard Worker
698*61046927SAndroid Build Coastguard Worker /* Check that the semantics of the class are respected */
699*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global(ctx, ins) {
700*61046927SAndroid Build Coastguard Worker assert(check_write_class(l->class, ins->type, ins->dest));
701*61046927SAndroid Build Coastguard Worker assert(check_read_class(l->class, ins->type, ins->src[0]));
702*61046927SAndroid Build Coastguard Worker assert(check_read_class(l->class, ins->type, ins->src[1]));
703*61046927SAndroid Build Coastguard Worker assert(check_read_class(l->class, ins->type, ins->src[2]));
704*61046927SAndroid Build Coastguard Worker assert(check_read_class(l->class, ins->type, ins->src[3]));
705*61046927SAndroid Build Coastguard Worker }
706*61046927SAndroid Build Coastguard Worker
707*61046927SAndroid Build Coastguard Worker /* Mark writeout to r0, depth to r1.x, stencil to r1.y,
708*61046927SAndroid Build Coastguard Worker * render target to r1.z, unknown to r1.w */
709*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global(ctx, ins) {
710*61046927SAndroid Build Coastguard Worker if (!(ins->compact_branch && ins->writeout))
711*61046927SAndroid Build Coastguard Worker continue;
712*61046927SAndroid Build Coastguard Worker
713*61046927SAndroid Build Coastguard Worker if (ins->src[0] < ctx->temp_count)
714*61046927SAndroid Build Coastguard Worker l->solutions[ins->src[0]] = 0;
715*61046927SAndroid Build Coastguard Worker
716*61046927SAndroid Build Coastguard Worker if (ins->src[2] < ctx->temp_count)
717*61046927SAndroid Build Coastguard Worker l->solutions[ins->src[2]] = (16 * 1) + COMPONENT_X * 4;
718*61046927SAndroid Build Coastguard Worker
719*61046927SAndroid Build Coastguard Worker if (ins->src[3] < ctx->temp_count)
720*61046927SAndroid Build Coastguard Worker l->solutions[ins->src[3]] = (16 * 1) + COMPONENT_Y * 4;
721*61046927SAndroid Build Coastguard Worker
722*61046927SAndroid Build Coastguard Worker if (ins->src[1] < ctx->temp_count)
723*61046927SAndroid Build Coastguard Worker l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4;
724*61046927SAndroid Build Coastguard Worker
725*61046927SAndroid Build Coastguard Worker if (ins->dest < ctx->temp_count)
726*61046927SAndroid Build Coastguard Worker l->solutions[ins->dest] = (16 * 1) + COMPONENT_W * 4;
727*61046927SAndroid Build Coastguard Worker }
728*61046927SAndroid Build Coastguard Worker
729*61046927SAndroid Build Coastguard Worker /* Destinations of instructions in a writeout block cannot be assigned
730*61046927SAndroid Build Coastguard Worker * to r1 unless they are actually used as r1 from the writeout itself,
731*61046927SAndroid Build Coastguard Worker * since the writes to r1 are special. A code sequence like:
732*61046927SAndroid Build Coastguard Worker *
733*61046927SAndroid Build Coastguard Worker * sadd.fmov r1.x, [...]
734*61046927SAndroid Build Coastguard Worker * vadd.fadd r0, r1, r2
735*61046927SAndroid Build Coastguard Worker * [writeout branch]
736*61046927SAndroid Build Coastguard Worker *
737*61046927SAndroid Build Coastguard Worker * will misbehave since the r1.x write will be interpreted as a
738*61046927SAndroid Build Coastguard Worker * gl_FragDepth write so it won't show up correctly when r1 is read in
739*61046927SAndroid Build Coastguard Worker * the following segment. We model this as interference.
740*61046927SAndroid Build Coastguard Worker */
741*61046927SAndroid Build Coastguard Worker
742*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < 4; ++i)
743*61046927SAndroid Build Coastguard Worker l->solutions[ctx->temp_count + i] = (16 * i);
744*61046927SAndroid Build Coastguard Worker
745*61046927SAndroid Build Coastguard Worker mir_foreach_block(ctx, _blk) {
746*61046927SAndroid Build Coastguard Worker midgard_block *blk = (midgard_block *)_blk;
747*61046927SAndroid Build Coastguard Worker
748*61046927SAndroid Build Coastguard Worker mir_foreach_bundle_in_block(blk, v) {
749*61046927SAndroid Build Coastguard Worker /* We need at least a writeout and nonwriteout instruction */
750*61046927SAndroid Build Coastguard Worker if (v->instruction_count < 2)
751*61046927SAndroid Build Coastguard Worker continue;
752*61046927SAndroid Build Coastguard Worker
753*61046927SAndroid Build Coastguard Worker /* Branches always come at the end */
754*61046927SAndroid Build Coastguard Worker midgard_instruction *br = v->instructions[v->instruction_count - 1];
755*61046927SAndroid Build Coastguard Worker
756*61046927SAndroid Build Coastguard Worker if (!br->writeout)
757*61046927SAndroid Build Coastguard Worker continue;
758*61046927SAndroid Build Coastguard Worker
759*61046927SAndroid Build Coastguard Worker for (signed i = v->instruction_count - 2; i >= 0; --i) {
760*61046927SAndroid Build Coastguard Worker midgard_instruction *ins = v->instructions[i];
761*61046927SAndroid Build Coastguard Worker
762*61046927SAndroid Build Coastguard Worker if (ins->dest >= ctx->temp_count)
763*61046927SAndroid Build Coastguard Worker continue;
764*61046927SAndroid Build Coastguard Worker
765*61046927SAndroid Build Coastguard Worker bool used_as_r1 = (br->dest == ins->dest);
766*61046927SAndroid Build Coastguard Worker
767*61046927SAndroid Build Coastguard Worker mir_foreach_src(br, s)
768*61046927SAndroid Build Coastguard Worker used_as_r1 |= (s > 0) && (br->src[s] == ins->dest);
769*61046927SAndroid Build Coastguard Worker
770*61046927SAndroid Build Coastguard Worker if (!used_as_r1)
771*61046927SAndroid Build Coastguard Worker lcra_add_node_interference(l, ins->dest, mir_bytemask(ins),
772*61046927SAndroid Build Coastguard Worker node_r1, 0xFFFF);
773*61046927SAndroid Build Coastguard Worker }
774*61046927SAndroid Build Coastguard Worker }
775*61046927SAndroid Build Coastguard Worker }
776*61046927SAndroid Build Coastguard Worker
777*61046927SAndroid Build Coastguard Worker /* Precolour blend input to r0. Note writeout is necessarily at the end
778*61046927SAndroid Build Coastguard Worker * and blend shaders are single-RT only so there is only a single
779*61046927SAndroid Build Coastguard Worker * writeout block, so this cannot conflict with the writeout r0 (there
780*61046927SAndroid Build Coastguard Worker * is no need to have an intermediate move) */
781*61046927SAndroid Build Coastguard Worker
782*61046927SAndroid Build Coastguard Worker if (ctx->blend_input != ~0) {
783*61046927SAndroid Build Coastguard Worker assert(ctx->blend_input < ctx->temp_count);
784*61046927SAndroid Build Coastguard Worker l->solutions[ctx->blend_input] = 0;
785*61046927SAndroid Build Coastguard Worker }
786*61046927SAndroid Build Coastguard Worker
787*61046927SAndroid Build Coastguard Worker /* Same for the dual-source blend input/output, except here we use r2,
788*61046927SAndroid Build Coastguard Worker * which is also set in the fragment shader. */
789*61046927SAndroid Build Coastguard Worker
790*61046927SAndroid Build Coastguard Worker if (ctx->blend_src1 != ~0) {
791*61046927SAndroid Build Coastguard Worker assert(ctx->blend_src1 < ctx->temp_count);
792*61046927SAndroid Build Coastguard Worker l->solutions[ctx->blend_src1] = (16 * 2);
793*61046927SAndroid Build Coastguard Worker ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
794*61046927SAndroid Build Coastguard Worker }
795*61046927SAndroid Build Coastguard Worker
796*61046927SAndroid Build Coastguard Worker mir_compute_interference(ctx, l);
797*61046927SAndroid Build Coastguard Worker
798*61046927SAndroid Build Coastguard Worker *spilled = !lcra_solve(l);
799*61046927SAndroid Build Coastguard Worker return l;
800*61046927SAndroid Build Coastguard Worker }
801*61046927SAndroid Build Coastguard Worker
802*61046927SAndroid Build Coastguard Worker /* Once registers have been decided via register allocation
803*61046927SAndroid Build Coastguard Worker * (allocate_registers), we need to rewrite the MIR to use registers instead of
804*61046927SAndroid Build Coastguard Worker * indices */
805*61046927SAndroid Build Coastguard Worker
806*61046927SAndroid Build Coastguard Worker static void
install_registers_instr(compiler_context * ctx,struct lcra_state * l,midgard_instruction * ins)807*61046927SAndroid Build Coastguard Worker install_registers_instr(compiler_context *ctx, struct lcra_state *l,
808*61046927SAndroid Build Coastguard Worker midgard_instruction *ins)
809*61046927SAndroid Build Coastguard Worker {
810*61046927SAndroid Build Coastguard Worker unsigned src_shift[MIR_SRC_COUNT];
811*61046927SAndroid Build Coastguard Worker
812*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < MIR_SRC_COUNT; ++i) {
813*61046927SAndroid Build Coastguard Worker src_shift[i] =
814*61046927SAndroid Build Coastguard Worker util_logbase2(nir_alu_type_get_type_size(ins->src_types[i]) / 8);
815*61046927SAndroid Build Coastguard Worker }
816*61046927SAndroid Build Coastguard Worker
817*61046927SAndroid Build Coastguard Worker unsigned dest_shift =
818*61046927SAndroid Build Coastguard Worker util_logbase2(nir_alu_type_get_type_size(ins->dest_type) / 8);
819*61046927SAndroid Build Coastguard Worker
820*61046927SAndroid Build Coastguard Worker switch (ins->type) {
821*61046927SAndroid Build Coastguard Worker case TAG_ALU_4:
822*61046927SAndroid Build Coastguard Worker case TAG_ALU_8:
823*61046927SAndroid Build Coastguard Worker case TAG_ALU_12:
824*61046927SAndroid Build Coastguard Worker case TAG_ALU_16: {
825*61046927SAndroid Build Coastguard Worker if (ins->compact_branch)
826*61046927SAndroid Build Coastguard Worker return;
827*61046927SAndroid Build Coastguard Worker
828*61046927SAndroid Build Coastguard Worker struct phys_reg src1 = index_to_reg(ctx, l, ins->src[0], src_shift[0]);
829*61046927SAndroid Build Coastguard Worker struct phys_reg src2 = index_to_reg(ctx, l, ins->src[1], src_shift[1]);
830*61046927SAndroid Build Coastguard Worker struct phys_reg dest = index_to_reg(ctx, l, ins->dest, dest_shift);
831*61046927SAndroid Build Coastguard Worker
832*61046927SAndroid Build Coastguard Worker mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
833*61046927SAndroid Build Coastguard Worker
834*61046927SAndroid Build Coastguard Worker unsigned dest_offset =
835*61046927SAndroid Build Coastguard Worker GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props) ? 0 : dest.offset;
836*61046927SAndroid Build Coastguard Worker
837*61046927SAndroid Build Coastguard Worker offset_swizzle(ins->swizzle[0], src1.offset, src1.shift, dest.shift,
838*61046927SAndroid Build Coastguard Worker dest_offset);
839*61046927SAndroid Build Coastguard Worker if (!ins->has_inline_constant)
840*61046927SAndroid Build Coastguard Worker offset_swizzle(ins->swizzle[1], src2.offset, src2.shift, dest.shift,
841*61046927SAndroid Build Coastguard Worker dest_offset);
842*61046927SAndroid Build Coastguard Worker if (ins->src[0] != ~0)
843*61046927SAndroid Build Coastguard Worker ins->src[0] = SSA_FIXED_REGISTER(src1.reg);
844*61046927SAndroid Build Coastguard Worker if (ins->src[1] != ~0)
845*61046927SAndroid Build Coastguard Worker ins->src[1] = SSA_FIXED_REGISTER(src2.reg);
846*61046927SAndroid Build Coastguard Worker if (ins->dest != ~0)
847*61046927SAndroid Build Coastguard Worker ins->dest = SSA_FIXED_REGISTER(dest.reg);
848*61046927SAndroid Build Coastguard Worker break;
849*61046927SAndroid Build Coastguard Worker }
850*61046927SAndroid Build Coastguard Worker
851*61046927SAndroid Build Coastguard Worker case TAG_LOAD_STORE_4: {
852*61046927SAndroid Build Coastguard Worker /* Which physical register we read off depends on
853*61046927SAndroid Build Coastguard Worker * whether we are loading or storing -- think about the
854*61046927SAndroid Build Coastguard Worker * logical dataflow */
855*61046927SAndroid Build Coastguard Worker
856*61046927SAndroid Build Coastguard Worker bool encodes_src = OP_IS_STORE(ins->op);
857*61046927SAndroid Build Coastguard Worker
858*61046927SAndroid Build Coastguard Worker if (encodes_src) {
859*61046927SAndroid Build Coastguard Worker struct phys_reg src = index_to_reg(ctx, l, ins->src[0], src_shift[0]);
860*61046927SAndroid Build Coastguard Worker assert(src.reg == 26 || src.reg == 27);
861*61046927SAndroid Build Coastguard Worker
862*61046927SAndroid Build Coastguard Worker ins->src[0] = SSA_FIXED_REGISTER(src.reg);
863*61046927SAndroid Build Coastguard Worker offset_swizzle(ins->swizzle[0], src.offset, src.shift, 0, 0);
864*61046927SAndroid Build Coastguard Worker } else {
865*61046927SAndroid Build Coastguard Worker struct phys_reg dst = index_to_reg(ctx, l, ins->dest, dest_shift);
866*61046927SAndroid Build Coastguard Worker
867*61046927SAndroid Build Coastguard Worker ins->dest = SSA_FIXED_REGISTER(dst.reg);
868*61046927SAndroid Build Coastguard Worker offset_swizzle(ins->swizzle[0], 0, 2, dest_shift, dst.offset);
869*61046927SAndroid Build Coastguard Worker mir_set_bytemask(ins, mir_bytemask(ins) << dst.offset);
870*61046927SAndroid Build Coastguard Worker }
871*61046927SAndroid Build Coastguard Worker
872*61046927SAndroid Build Coastguard Worker /* We also follow up by actual arguments */
873*61046927SAndroid Build Coastguard Worker
874*61046927SAndroid Build Coastguard Worker for (int i = 1; i <= 3; i++) {
875*61046927SAndroid Build Coastguard Worker unsigned src_index = ins->src[i];
876*61046927SAndroid Build Coastguard Worker if (src_index != ~0) {
877*61046927SAndroid Build Coastguard Worker struct phys_reg src = index_to_reg(ctx, l, src_index, src_shift[i]);
878*61046927SAndroid Build Coastguard Worker unsigned component = src.offset >> src.shift;
879*61046927SAndroid Build Coastguard Worker assert(component << src.shift == src.offset);
880*61046927SAndroid Build Coastguard Worker ins->src[i] = SSA_FIXED_REGISTER(src.reg);
881*61046927SAndroid Build Coastguard Worker ins->swizzle[i][0] += component;
882*61046927SAndroid Build Coastguard Worker }
883*61046927SAndroid Build Coastguard Worker }
884*61046927SAndroid Build Coastguard Worker
885*61046927SAndroid Build Coastguard Worker break;
886*61046927SAndroid Build Coastguard Worker }
887*61046927SAndroid Build Coastguard Worker
888*61046927SAndroid Build Coastguard Worker case TAG_TEXTURE_4: {
889*61046927SAndroid Build Coastguard Worker if (ins->op == midgard_tex_op_barrier)
890*61046927SAndroid Build Coastguard Worker break;
891*61046927SAndroid Build Coastguard Worker
892*61046927SAndroid Build Coastguard Worker /* Grab RA results */
893*61046927SAndroid Build Coastguard Worker struct phys_reg dest = index_to_reg(ctx, l, ins->dest, dest_shift);
894*61046927SAndroid Build Coastguard Worker struct phys_reg coord = index_to_reg(ctx, l, ins->src[1], src_shift[1]);
895*61046927SAndroid Build Coastguard Worker struct phys_reg lod = index_to_reg(ctx, l, ins->src[2], src_shift[2]);
896*61046927SAndroid Build Coastguard Worker struct phys_reg offset = index_to_reg(ctx, l, ins->src[3], src_shift[3]);
897*61046927SAndroid Build Coastguard Worker
898*61046927SAndroid Build Coastguard Worker /* First, install the texture coordinate */
899*61046927SAndroid Build Coastguard Worker if (ins->src[1] != ~0)
900*61046927SAndroid Build Coastguard Worker ins->src[1] = SSA_FIXED_REGISTER(coord.reg);
901*61046927SAndroid Build Coastguard Worker offset_swizzle(ins->swizzle[1], coord.offset, coord.shift, dest.shift, 0);
902*61046927SAndroid Build Coastguard Worker
903*61046927SAndroid Build Coastguard Worker /* Next, install the destination */
904*61046927SAndroid Build Coastguard Worker if (ins->dest != ~0)
905*61046927SAndroid Build Coastguard Worker ins->dest = SSA_FIXED_REGISTER(dest.reg);
906*61046927SAndroid Build Coastguard Worker offset_swizzle(ins->swizzle[0], 0, 2, dest.shift,
907*61046927SAndroid Build Coastguard Worker dest_shift == 1 ? dest.offset % 8 : dest.offset);
908*61046927SAndroid Build Coastguard Worker mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
909*61046927SAndroid Build Coastguard Worker
910*61046927SAndroid Build Coastguard Worker /* If there is a register LOD/bias, use it */
911*61046927SAndroid Build Coastguard Worker if (ins->src[2] != ~0) {
912*61046927SAndroid Build Coastguard Worker assert(!(lod.offset & 3));
913*61046927SAndroid Build Coastguard Worker ins->src[2] = SSA_FIXED_REGISTER(lod.reg);
914*61046927SAndroid Build Coastguard Worker ins->swizzle[2][0] = lod.offset / 4;
915*61046927SAndroid Build Coastguard Worker }
916*61046927SAndroid Build Coastguard Worker
917*61046927SAndroid Build Coastguard Worker /* If there is an offset register, install it */
918*61046927SAndroid Build Coastguard Worker if (ins->src[3] != ~0) {
919*61046927SAndroid Build Coastguard Worker ins->src[3] = SSA_FIXED_REGISTER(offset.reg);
920*61046927SAndroid Build Coastguard Worker ins->swizzle[3][0] = offset.offset / 4;
921*61046927SAndroid Build Coastguard Worker }
922*61046927SAndroid Build Coastguard Worker
923*61046927SAndroid Build Coastguard Worker break;
924*61046927SAndroid Build Coastguard Worker }
925*61046927SAndroid Build Coastguard Worker
926*61046927SAndroid Build Coastguard Worker default:
927*61046927SAndroid Build Coastguard Worker break;
928*61046927SAndroid Build Coastguard Worker }
929*61046927SAndroid Build Coastguard Worker }
930*61046927SAndroid Build Coastguard Worker
931*61046927SAndroid Build Coastguard Worker static void
install_registers(compiler_context * ctx,struct lcra_state * l)932*61046927SAndroid Build Coastguard Worker install_registers(compiler_context *ctx, struct lcra_state *l)
933*61046927SAndroid Build Coastguard Worker {
934*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global(ctx, ins)
935*61046927SAndroid Build Coastguard Worker install_registers_instr(ctx, l, ins);
936*61046927SAndroid Build Coastguard Worker }
937*61046927SAndroid Build Coastguard Worker
938*61046927SAndroid Build Coastguard Worker /* If register allocation fails, find the best spill node */
939*61046927SAndroid Build Coastguard Worker
940*61046927SAndroid Build Coastguard Worker static signed
mir_choose_spill_node(compiler_context * ctx,struct lcra_state * l)941*61046927SAndroid Build Coastguard Worker mir_choose_spill_node(compiler_context *ctx, struct lcra_state *l)
942*61046927SAndroid Build Coastguard Worker {
943*61046927SAndroid Build Coastguard Worker /* We can't spill a previously spilled value or an unspill */
944*61046927SAndroid Build Coastguard Worker
945*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global(ctx, ins) {
946*61046927SAndroid Build Coastguard Worker if (ins->no_spill & (1 << l->spill_class)) {
947*61046927SAndroid Build Coastguard Worker lcra_set_node_spill_cost(l, ins->dest, -1);
948*61046927SAndroid Build Coastguard Worker
949*61046927SAndroid Build Coastguard Worker if (l->spill_class != REG_CLASS_WORK) {
950*61046927SAndroid Build Coastguard Worker mir_foreach_src(ins, s)
951*61046927SAndroid Build Coastguard Worker lcra_set_node_spill_cost(l, ins->src[s], -1);
952*61046927SAndroid Build Coastguard Worker }
953*61046927SAndroid Build Coastguard Worker }
954*61046927SAndroid Build Coastguard Worker }
955*61046927SAndroid Build Coastguard Worker
956*61046927SAndroid Build Coastguard Worker return lcra_get_best_spill_node(l);
957*61046927SAndroid Build Coastguard Worker }
958*61046927SAndroid Build Coastguard Worker
959*61046927SAndroid Build Coastguard Worker /* Once we've chosen a spill node, spill it */
960*61046927SAndroid Build Coastguard Worker
961*61046927SAndroid Build Coastguard Worker static void
mir_spill_register(compiler_context * ctx,unsigned spill_node,unsigned spill_class,unsigned * spill_count)962*61046927SAndroid Build Coastguard Worker mir_spill_register(compiler_context *ctx, unsigned spill_node,
963*61046927SAndroid Build Coastguard Worker unsigned spill_class, unsigned *spill_count)
964*61046927SAndroid Build Coastguard Worker {
965*61046927SAndroid Build Coastguard Worker if (spill_class == REG_CLASS_WORK && ctx->inputs->is_blend)
966*61046927SAndroid Build Coastguard Worker unreachable("Blend shader spilling is currently unimplemented");
967*61046927SAndroid Build Coastguard Worker
968*61046927SAndroid Build Coastguard Worker unsigned spill_index = ctx->temp_count;
969*61046927SAndroid Build Coastguard Worker
970*61046927SAndroid Build Coastguard Worker /* We have a spill node, so check the class. Work registers
971*61046927SAndroid Build Coastguard Worker * legitimately spill to TLS, but special registers just spill to work
972*61046927SAndroid Build Coastguard Worker * registers */
973*61046927SAndroid Build Coastguard Worker
974*61046927SAndroid Build Coastguard Worker bool is_special = spill_class != REG_CLASS_WORK;
975*61046927SAndroid Build Coastguard Worker bool is_special_w = spill_class == REG_CLASS_TEXW;
976*61046927SAndroid Build Coastguard Worker
977*61046927SAndroid Build Coastguard Worker /* Allocate TLS slot (maybe) */
978*61046927SAndroid Build Coastguard Worker unsigned spill_slot = !is_special ? (*spill_count)++ : 0;
979*61046927SAndroid Build Coastguard Worker
980*61046927SAndroid Build Coastguard Worker /* For special reads, figure out how many bytes we need */
981*61046927SAndroid Build Coastguard Worker unsigned read_bytemask = 0;
982*61046927SAndroid Build Coastguard Worker
983*61046927SAndroid Build Coastguard Worker /* If multiple instructions write to this destination, we'll have to
984*61046927SAndroid Build Coastguard Worker * fill from TLS before writing */
985*61046927SAndroid Build Coastguard Worker unsigned write_count = 0;
986*61046927SAndroid Build Coastguard Worker
987*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global_safe(ctx, ins) {
988*61046927SAndroid Build Coastguard Worker read_bytemask |= mir_bytemask_of_read_components(ins, spill_node);
989*61046927SAndroid Build Coastguard Worker if (ins->dest == spill_node)
990*61046927SAndroid Build Coastguard Worker ++write_count;
991*61046927SAndroid Build Coastguard Worker }
992*61046927SAndroid Build Coastguard Worker
993*61046927SAndroid Build Coastguard Worker /* For TLS, replace all stores to the spilled node. For
994*61046927SAndroid Build Coastguard Worker * special reads, just keep as-is; the class will be demoted
995*61046927SAndroid Build Coastguard Worker * implicitly. For special writes, spill to a work register */
996*61046927SAndroid Build Coastguard Worker
997*61046927SAndroid Build Coastguard Worker if (!is_special || is_special_w) {
998*61046927SAndroid Build Coastguard Worker if (is_special_w)
999*61046927SAndroid Build Coastguard Worker spill_slot = spill_index++;
1000*61046927SAndroid Build Coastguard Worker
1001*61046927SAndroid Build Coastguard Worker unsigned last_id = ~0;
1002*61046927SAndroid Build Coastguard Worker unsigned last_fill = ~0;
1003*61046927SAndroid Build Coastguard Worker unsigned last_spill_index = ~0;
1004*61046927SAndroid Build Coastguard Worker midgard_instruction *last_spill = NULL;
1005*61046927SAndroid Build Coastguard Worker
1006*61046927SAndroid Build Coastguard Worker mir_foreach_block(ctx, _block) {
1007*61046927SAndroid Build Coastguard Worker midgard_block *block = (midgard_block *)_block;
1008*61046927SAndroid Build Coastguard Worker mir_foreach_instr_in_block_safe(block, ins) {
1009*61046927SAndroid Build Coastguard Worker if (ins->dest != spill_node)
1010*61046927SAndroid Build Coastguard Worker continue;
1011*61046927SAndroid Build Coastguard Worker
1012*61046927SAndroid Build Coastguard Worker /* Note: it's important to match the mask of the spill
1013*61046927SAndroid Build Coastguard Worker * with the mask of the instruction whose destination
1014*61046927SAndroid Build Coastguard Worker * we're spilling, or otherwise we'll read invalid
1015*61046927SAndroid Build Coastguard Worker * components and can fail RA in a subsequent iteration
1016*61046927SAndroid Build Coastguard Worker */
1017*61046927SAndroid Build Coastguard Worker
1018*61046927SAndroid Build Coastguard Worker if (is_special_w) {
1019*61046927SAndroid Build Coastguard Worker midgard_instruction st = v_mov(spill_node, spill_slot);
1020*61046927SAndroid Build Coastguard Worker st.no_spill |= (1 << spill_class);
1021*61046927SAndroid Build Coastguard Worker st.mask = ins->mask;
1022*61046927SAndroid Build Coastguard Worker st.dest_type = st.src_types[1] = ins->dest_type;
1023*61046927SAndroid Build Coastguard Worker
1024*61046927SAndroid Build Coastguard Worker /* Hint: don't rewrite this node */
1025*61046927SAndroid Build Coastguard Worker st.hint = true;
1026*61046927SAndroid Build Coastguard Worker
1027*61046927SAndroid Build Coastguard Worker mir_insert_instruction_after_scheduled(ctx, block, ins, st);
1028*61046927SAndroid Build Coastguard Worker } else {
1029*61046927SAndroid Build Coastguard Worker unsigned bundle = ins->bundle_id;
1030*61046927SAndroid Build Coastguard Worker unsigned dest =
1031*61046927SAndroid Build Coastguard Worker (bundle == last_id) ? last_spill_index : spill_index++;
1032*61046927SAndroid Build Coastguard Worker
1033*61046927SAndroid Build Coastguard Worker unsigned bytemask = mir_bytemask(ins);
1034*61046927SAndroid Build Coastguard Worker unsigned write_mask =
1035*61046927SAndroid Build Coastguard Worker mir_from_bytemask(mir_round_bytemask_up(bytemask, 32), 32);
1036*61046927SAndroid Build Coastguard Worker
1037*61046927SAndroid Build Coastguard Worker if (write_count > 1 && bytemask != 0xFFFF &&
1038*61046927SAndroid Build Coastguard Worker bundle != last_fill) {
1039*61046927SAndroid Build Coastguard Worker midgard_instruction read =
1040*61046927SAndroid Build Coastguard Worker v_load_store_scratch(dest, spill_slot, false, 0xF);
1041*61046927SAndroid Build Coastguard Worker mir_insert_instruction_before_scheduled(ctx, block, ins,
1042*61046927SAndroid Build Coastguard Worker read);
1043*61046927SAndroid Build Coastguard Worker write_mask = 0xF;
1044*61046927SAndroid Build Coastguard Worker last_fill = bundle;
1045*61046927SAndroid Build Coastguard Worker }
1046*61046927SAndroid Build Coastguard Worker
1047*61046927SAndroid Build Coastguard Worker ins->dest = dest;
1048*61046927SAndroid Build Coastguard Worker ins->no_spill |= (1 << spill_class);
1049*61046927SAndroid Build Coastguard Worker
1050*61046927SAndroid Build Coastguard Worker bool move = false;
1051*61046927SAndroid Build Coastguard Worker
1052*61046927SAndroid Build Coastguard Worker /* In the same bundle, reads of the destination
1053*61046927SAndroid Build Coastguard Worker * of the spilt instruction need to be direct */
1054*61046927SAndroid Build Coastguard Worker midgard_instruction *it = ins;
1055*61046927SAndroid Build Coastguard Worker while ((it = list_first_entry(&it->link, midgard_instruction,
1056*61046927SAndroid Build Coastguard Worker link)) &&
1057*61046927SAndroid Build Coastguard Worker (it->bundle_id == bundle)) {
1058*61046927SAndroid Build Coastguard Worker
1059*61046927SAndroid Build Coastguard Worker if (!mir_has_arg(it, spill_node))
1060*61046927SAndroid Build Coastguard Worker continue;
1061*61046927SAndroid Build Coastguard Worker
1062*61046927SAndroid Build Coastguard Worker mir_rewrite_index_src_single(it, spill_node, dest);
1063*61046927SAndroid Build Coastguard Worker
1064*61046927SAndroid Build Coastguard Worker /* The spilt instruction will write to
1065*61046927SAndroid Build Coastguard Worker * a work register for `it` to read but
1066*61046927SAndroid Build Coastguard Worker * the spill needs an LD/ST register */
1067*61046927SAndroid Build Coastguard Worker move = true;
1068*61046927SAndroid Build Coastguard Worker }
1069*61046927SAndroid Build Coastguard Worker
1070*61046927SAndroid Build Coastguard Worker if (move)
1071*61046927SAndroid Build Coastguard Worker dest = spill_index++;
1072*61046927SAndroid Build Coastguard Worker
1073*61046927SAndroid Build Coastguard Worker if (last_id == bundle) {
1074*61046927SAndroid Build Coastguard Worker last_spill->mask |= write_mask;
1075*61046927SAndroid Build Coastguard Worker u_foreach_bit(c, write_mask)
1076*61046927SAndroid Build Coastguard Worker last_spill->swizzle[0][c] = c;
1077*61046927SAndroid Build Coastguard Worker } else {
1078*61046927SAndroid Build Coastguard Worker midgard_instruction st =
1079*61046927SAndroid Build Coastguard Worker v_load_store_scratch(dest, spill_slot, true, write_mask);
1080*61046927SAndroid Build Coastguard Worker last_spill = mir_insert_instruction_after_scheduled(
1081*61046927SAndroid Build Coastguard Worker ctx, block, ins, st);
1082*61046927SAndroid Build Coastguard Worker }
1083*61046927SAndroid Build Coastguard Worker
1084*61046927SAndroid Build Coastguard Worker if (move) {
1085*61046927SAndroid Build Coastguard Worker midgard_instruction mv = v_mov(ins->dest, dest);
1086*61046927SAndroid Build Coastguard Worker mv.no_spill |= (1 << spill_class);
1087*61046927SAndroid Build Coastguard Worker
1088*61046927SAndroid Build Coastguard Worker mir_insert_instruction_after_scheduled(ctx, block, ins, mv);
1089*61046927SAndroid Build Coastguard Worker }
1090*61046927SAndroid Build Coastguard Worker
1091*61046927SAndroid Build Coastguard Worker last_id = bundle;
1092*61046927SAndroid Build Coastguard Worker last_spill_index = ins->dest;
1093*61046927SAndroid Build Coastguard Worker }
1094*61046927SAndroid Build Coastguard Worker
1095*61046927SAndroid Build Coastguard Worker if (!is_special)
1096*61046927SAndroid Build Coastguard Worker ctx->spills++;
1097*61046927SAndroid Build Coastguard Worker }
1098*61046927SAndroid Build Coastguard Worker }
1099*61046927SAndroid Build Coastguard Worker }
1100*61046927SAndroid Build Coastguard Worker
1101*61046927SAndroid Build Coastguard Worker /* Insert a load from TLS before the first consecutive
1102*61046927SAndroid Build Coastguard Worker * use of the node, rewriting to use spilled indices to
1103*61046927SAndroid Build Coastguard Worker * break up the live range. Or, for special, insert a
1104*61046927SAndroid Build Coastguard Worker * move. Ironically the latter *increases* register
1105*61046927SAndroid Build Coastguard Worker * pressure, but the two uses of the spilling mechanism
1106*61046927SAndroid Build Coastguard Worker * are somewhat orthogonal. (special spilling is to use
1107*61046927SAndroid Build Coastguard Worker * work registers to back special registers; TLS
1108*61046927SAndroid Build Coastguard Worker * spilling is to use memory to back work registers) */
1109*61046927SAndroid Build Coastguard Worker
1110*61046927SAndroid Build Coastguard Worker mir_foreach_block(ctx, _block) {
1111*61046927SAndroid Build Coastguard Worker midgard_block *block = (midgard_block *)_block;
1112*61046927SAndroid Build Coastguard Worker mir_foreach_instr_in_block(block, ins) {
1113*61046927SAndroid Build Coastguard Worker /* We can't rewrite the moves used to spill in the
1114*61046927SAndroid Build Coastguard Worker * first place. These moves are hinted. */
1115*61046927SAndroid Build Coastguard Worker if (ins->hint)
1116*61046927SAndroid Build Coastguard Worker continue;
1117*61046927SAndroid Build Coastguard Worker
1118*61046927SAndroid Build Coastguard Worker /* If we don't use the spilled value, nothing to do */
1119*61046927SAndroid Build Coastguard Worker if (!mir_has_arg(ins, spill_node))
1120*61046927SAndroid Build Coastguard Worker continue;
1121*61046927SAndroid Build Coastguard Worker
1122*61046927SAndroid Build Coastguard Worker unsigned index = 0;
1123*61046927SAndroid Build Coastguard Worker
1124*61046927SAndroid Build Coastguard Worker if (!is_special_w) {
1125*61046927SAndroid Build Coastguard Worker index = ++spill_index;
1126*61046927SAndroid Build Coastguard Worker
1127*61046927SAndroid Build Coastguard Worker midgard_instruction *before = ins;
1128*61046927SAndroid Build Coastguard Worker midgard_instruction st;
1129*61046927SAndroid Build Coastguard Worker
1130*61046927SAndroid Build Coastguard Worker if (is_special) {
1131*61046927SAndroid Build Coastguard Worker /* Move */
1132*61046927SAndroid Build Coastguard Worker st = v_mov(spill_node, index);
1133*61046927SAndroid Build Coastguard Worker st.no_spill |= (1 << spill_class);
1134*61046927SAndroid Build Coastguard Worker } else {
1135*61046927SAndroid Build Coastguard Worker /* TLS load */
1136*61046927SAndroid Build Coastguard Worker st = v_load_store_scratch(index, spill_slot, false, 0xF);
1137*61046927SAndroid Build Coastguard Worker }
1138*61046927SAndroid Build Coastguard Worker
1139*61046927SAndroid Build Coastguard Worker /* Mask the load based on the component count
1140*61046927SAndroid Build Coastguard Worker * actually needed to prevent RA loops */
1141*61046927SAndroid Build Coastguard Worker
1142*61046927SAndroid Build Coastguard Worker st.mask =
1143*61046927SAndroid Build Coastguard Worker mir_from_bytemask(mir_round_bytemask_up(read_bytemask, 32), 32);
1144*61046927SAndroid Build Coastguard Worker
1145*61046927SAndroid Build Coastguard Worker mir_insert_instruction_before_scheduled(ctx, block, before, st);
1146*61046927SAndroid Build Coastguard Worker } else {
1147*61046927SAndroid Build Coastguard Worker /* Special writes already have their move spilled in */
1148*61046927SAndroid Build Coastguard Worker index = spill_slot;
1149*61046927SAndroid Build Coastguard Worker }
1150*61046927SAndroid Build Coastguard Worker
1151*61046927SAndroid Build Coastguard Worker /* Rewrite to use */
1152*61046927SAndroid Build Coastguard Worker mir_rewrite_index_src_single(ins, spill_node, index);
1153*61046927SAndroid Build Coastguard Worker
1154*61046927SAndroid Build Coastguard Worker if (!is_special)
1155*61046927SAndroid Build Coastguard Worker ctx->fills++;
1156*61046927SAndroid Build Coastguard Worker }
1157*61046927SAndroid Build Coastguard Worker }
1158*61046927SAndroid Build Coastguard Worker
1159*61046927SAndroid Build Coastguard Worker /* Reset hints */
1160*61046927SAndroid Build Coastguard Worker
1161*61046927SAndroid Build Coastguard Worker mir_foreach_instr_global(ctx, ins) {
1162*61046927SAndroid Build Coastguard Worker ins->hint = false;
1163*61046927SAndroid Build Coastguard Worker }
1164*61046927SAndroid Build Coastguard Worker }
1165*61046927SAndroid Build Coastguard Worker
1166*61046927SAndroid Build Coastguard Worker static void
mir_demote_uniforms(compiler_context * ctx,unsigned new_cutoff)1167*61046927SAndroid Build Coastguard Worker mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
1168*61046927SAndroid Build Coastguard Worker {
1169*61046927SAndroid Build Coastguard Worker unsigned uniforms = ctx->info->push.count / 4;
1170*61046927SAndroid Build Coastguard Worker unsigned old_work_count = 16 - MAX2(uniforms - 8, 0);
1171*61046927SAndroid Build Coastguard Worker unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
1172*61046927SAndroid Build Coastguard Worker
1173*61046927SAndroid Build Coastguard Worker unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
1174*61046927SAndroid Build Coastguard Worker unsigned max_demote = SSA_FIXED_REGISTER(work_count);
1175*61046927SAndroid Build Coastguard Worker
1176*61046927SAndroid Build Coastguard Worker mir_foreach_block(ctx, _block) {
1177*61046927SAndroid Build Coastguard Worker midgard_block *block = (midgard_block *)_block;
1178*61046927SAndroid Build Coastguard Worker mir_foreach_instr_in_block(block, ins) {
1179*61046927SAndroid Build Coastguard Worker mir_foreach_src(ins, i) {
1180*61046927SAndroid Build Coastguard Worker if (ins->src[i] < min_demote || ins->src[i] >= max_demote)
1181*61046927SAndroid Build Coastguard Worker continue;
1182*61046927SAndroid Build Coastguard Worker
1183*61046927SAndroid Build Coastguard Worker midgard_instruction *before = ins;
1184*61046927SAndroid Build Coastguard Worker
1185*61046927SAndroid Build Coastguard Worker unsigned temp = make_compiler_temp(ctx);
1186*61046927SAndroid Build Coastguard Worker unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
1187*61046927SAndroid Build Coastguard Worker assert(idx < ctx->info->push.count);
1188*61046927SAndroid Build Coastguard Worker
1189*61046927SAndroid Build Coastguard Worker ctx->ubo_mask |= BITSET_BIT(ctx->info->push.words[idx].ubo);
1190*61046927SAndroid Build Coastguard Worker
1191*61046927SAndroid Build Coastguard Worker midgard_instruction ld = {
1192*61046927SAndroid Build Coastguard Worker .type = TAG_LOAD_STORE_4,
1193*61046927SAndroid Build Coastguard Worker .mask = 0xF,
1194*61046927SAndroid Build Coastguard Worker .dest = temp,
1195*61046927SAndroid Build Coastguard Worker .dest_type = ins->src_types[i],
1196*61046927SAndroid Build Coastguard Worker .src = {~0, ~0, ~0, ~0},
1197*61046927SAndroid Build Coastguard Worker .swizzle = SWIZZLE_IDENTITY_4,
1198*61046927SAndroid Build Coastguard Worker .op = midgard_op_ld_ubo_128,
1199*61046927SAndroid Build Coastguard Worker .load_store =
1200*61046927SAndroid Build Coastguard Worker {
1201*61046927SAndroid Build Coastguard Worker .index_reg = REGISTER_LDST_ZERO,
1202*61046927SAndroid Build Coastguard Worker },
1203*61046927SAndroid Build Coastguard Worker .constants.u32[0] = ctx->info->push.words[idx].offset,
1204*61046927SAndroid Build Coastguard Worker };
1205*61046927SAndroid Build Coastguard Worker
1206*61046927SAndroid Build Coastguard Worker midgard_pack_ubo_index_imm(&ld.load_store,
1207*61046927SAndroid Build Coastguard Worker ctx->info->push.words[idx].ubo);
1208*61046927SAndroid Build Coastguard Worker
1209*61046927SAndroid Build Coastguard Worker mir_insert_instruction_before_scheduled(ctx, block, before, ld);
1210*61046927SAndroid Build Coastguard Worker
1211*61046927SAndroid Build Coastguard Worker mir_rewrite_index_src_single(ins, ins->src[i], temp);
1212*61046927SAndroid Build Coastguard Worker }
1213*61046927SAndroid Build Coastguard Worker }
1214*61046927SAndroid Build Coastguard Worker }
1215*61046927SAndroid Build Coastguard Worker
1216*61046927SAndroid Build Coastguard Worker ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4);
1217*61046927SAndroid Build Coastguard Worker }
1218*61046927SAndroid Build Coastguard Worker
1219*61046927SAndroid Build Coastguard Worker /* Run register allocation in a loop, spilling until we succeed */
1220*61046927SAndroid Build Coastguard Worker
1221*61046927SAndroid Build Coastguard Worker void
mir_ra(compiler_context * ctx)1222*61046927SAndroid Build Coastguard Worker mir_ra(compiler_context *ctx)
1223*61046927SAndroid Build Coastguard Worker {
1224*61046927SAndroid Build Coastguard Worker struct lcra_state *l = NULL;
1225*61046927SAndroid Build Coastguard Worker bool spilled = false;
1226*61046927SAndroid Build Coastguard Worker int iter_count = 1000; /* max iterations */
1227*61046927SAndroid Build Coastguard Worker
1228*61046927SAndroid Build Coastguard Worker /* Number of 128-bit slots in memory we've spilled into */
1229*61046927SAndroid Build Coastguard Worker unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);
1230*61046927SAndroid Build Coastguard Worker
1231*61046927SAndroid Build Coastguard Worker mir_create_pipeline_registers(ctx);
1232*61046927SAndroid Build Coastguard Worker
1233*61046927SAndroid Build Coastguard Worker do {
1234*61046927SAndroid Build Coastguard Worker if (spilled) {
1235*61046927SAndroid Build Coastguard Worker signed spill_node = mir_choose_spill_node(ctx, l);
1236*61046927SAndroid Build Coastguard Worker unsigned uniforms = ctx->info->push.count / 4;
1237*61046927SAndroid Build Coastguard Worker
1238*61046927SAndroid Build Coastguard Worker /* It's a lot cheaper to demote uniforms to get more
1239*61046927SAndroid Build Coastguard Worker * work registers than to spill to TLS. */
1240*61046927SAndroid Build Coastguard Worker if (l->spill_class == REG_CLASS_WORK && uniforms > 8) {
1241*61046927SAndroid Build Coastguard Worker mir_demote_uniforms(ctx, MAX2(uniforms - 4, 8));
1242*61046927SAndroid Build Coastguard Worker } else if (spill_node == -1) {
1243*61046927SAndroid Build Coastguard Worker fprintf(stderr, "ERROR: Failed to choose spill node\n");
1244*61046927SAndroid Build Coastguard Worker lcra_free(l);
1245*61046927SAndroid Build Coastguard Worker return;
1246*61046927SAndroid Build Coastguard Worker } else {
1247*61046927SAndroid Build Coastguard Worker mir_spill_register(ctx, spill_node, l->spill_class, &spill_count);
1248*61046927SAndroid Build Coastguard Worker }
1249*61046927SAndroid Build Coastguard Worker }
1250*61046927SAndroid Build Coastguard Worker
1251*61046927SAndroid Build Coastguard Worker mir_squeeze_index(ctx);
1252*61046927SAndroid Build Coastguard Worker mir_invalidate_liveness(ctx);
1253*61046927SAndroid Build Coastguard Worker
1254*61046927SAndroid Build Coastguard Worker if (l) {
1255*61046927SAndroid Build Coastguard Worker lcra_free(l);
1256*61046927SAndroid Build Coastguard Worker l = NULL;
1257*61046927SAndroid Build Coastguard Worker }
1258*61046927SAndroid Build Coastguard Worker
1259*61046927SAndroid Build Coastguard Worker l = allocate_registers(ctx, &spilled);
1260*61046927SAndroid Build Coastguard Worker } while (spilled && ((iter_count--) > 0));
1261*61046927SAndroid Build Coastguard Worker
1262*61046927SAndroid Build Coastguard Worker if (iter_count <= 0) {
1263*61046927SAndroid Build Coastguard Worker fprintf(
1264*61046927SAndroid Build Coastguard Worker stderr,
1265*61046927SAndroid Build Coastguard Worker "panfrost: Gave up allocating registers, rendering will be incomplete\n");
1266*61046927SAndroid Build Coastguard Worker assert(0);
1267*61046927SAndroid Build Coastguard Worker }
1268*61046927SAndroid Build Coastguard Worker
1269*61046927SAndroid Build Coastguard Worker /* Report spilling information. spill_count is in 128-bit slots (vec4 x
1270*61046927SAndroid Build Coastguard Worker * fp32), but tls_size is in bytes, so multiply by 16 */
1271*61046927SAndroid Build Coastguard Worker
1272*61046927SAndroid Build Coastguard Worker ctx->info->tls_size = spill_count * 16;
1273*61046927SAndroid Build Coastguard Worker
1274*61046927SAndroid Build Coastguard Worker install_registers(ctx, l);
1275*61046927SAndroid Build Coastguard Worker
1276*61046927SAndroid Build Coastguard Worker lcra_free(l);
1277*61046927SAndroid Build Coastguard Worker }
1278