xref: /aosp_15_r20/external/mesa3d/src/panfrost/midgard/midgard_ra.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright (C) 2018-2019 Alyssa Rosenzweig <[email protected]>
3*61046927SAndroid Build Coastguard Worker  * Copyright (C) 2019 Collabora, Ltd.
4*61046927SAndroid Build Coastguard Worker  *
5*61046927SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
6*61046927SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
7*61046927SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
8*61046927SAndroid Build Coastguard Worker  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9*61046927SAndroid Build Coastguard Worker  * and/or sell copies of the Software, and to permit persons to whom the
10*61046927SAndroid Build Coastguard Worker  * Software is furnished to do so, subject to the following conditions:
11*61046927SAndroid Build Coastguard Worker  *
12*61046927SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice (including the next
13*61046927SAndroid Build Coastguard Worker  * paragraph) shall be included in all copies or substantial portions of the
14*61046927SAndroid Build Coastguard Worker  * Software.
15*61046927SAndroid Build Coastguard Worker  *
16*61046927SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*61046927SAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*61046927SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19*61046927SAndroid Build Coastguard Worker  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*61046927SAndroid Build Coastguard Worker  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*61046927SAndroid Build Coastguard Worker  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*61046927SAndroid Build Coastguard Worker  * SOFTWARE.
23*61046927SAndroid Build Coastguard Worker  */
24*61046927SAndroid Build Coastguard Worker 
25*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
26*61046927SAndroid Build Coastguard Worker #include "util/u_memory.h"
27*61046927SAndroid Build Coastguard Worker #include "compiler.h"
28*61046927SAndroid Build Coastguard Worker #include "midgard_ops.h"
29*61046927SAndroid Build Coastguard Worker #include "midgard_quirks.h"
30*61046927SAndroid Build Coastguard Worker 
31*61046927SAndroid Build Coastguard Worker struct phys_reg {
32*61046927SAndroid Build Coastguard Worker    /* Physical register: 0-31 */
33*61046927SAndroid Build Coastguard Worker    unsigned reg;
34*61046927SAndroid Build Coastguard Worker 
35*61046927SAndroid Build Coastguard Worker    /* Byte offset into the physical register: 0-15 */
36*61046927SAndroid Build Coastguard Worker    unsigned offset;
37*61046927SAndroid Build Coastguard Worker 
38*61046927SAndroid Build Coastguard Worker    /* log2(bytes per component) for fast mul/div */
39*61046927SAndroid Build Coastguard Worker    unsigned shift;
40*61046927SAndroid Build Coastguard Worker };
41*61046927SAndroid Build Coastguard Worker 
42*61046927SAndroid Build Coastguard Worker /* Shift up by reg_offset and horizontally by dst_offset. */
43*61046927SAndroid Build Coastguard Worker 
44*61046927SAndroid Build Coastguard Worker static void
offset_swizzle(unsigned * swizzle,unsigned reg_offset,unsigned srcshift,unsigned dstshift,unsigned dst_offset)45*61046927SAndroid Build Coastguard Worker offset_swizzle(unsigned *swizzle, unsigned reg_offset, unsigned srcshift,
46*61046927SAndroid Build Coastguard Worker                unsigned dstshift, unsigned dst_offset)
47*61046927SAndroid Build Coastguard Worker {
48*61046927SAndroid Build Coastguard Worker    unsigned out[MIR_VEC_COMPONENTS];
49*61046927SAndroid Build Coastguard Worker 
50*61046927SAndroid Build Coastguard Worker    signed reg_comp = reg_offset >> srcshift;
51*61046927SAndroid Build Coastguard Worker    signed dst_comp = dst_offset >> dstshift;
52*61046927SAndroid Build Coastguard Worker 
53*61046927SAndroid Build Coastguard Worker    unsigned max_component = (16 >> srcshift) - 1;
54*61046927SAndroid Build Coastguard Worker 
55*61046927SAndroid Build Coastguard Worker    assert(reg_comp << srcshift == reg_offset);
56*61046927SAndroid Build Coastguard Worker    assert(dst_comp << dstshift == dst_offset);
57*61046927SAndroid Build Coastguard Worker 
58*61046927SAndroid Build Coastguard Worker    for (signed c = 0; c < MIR_VEC_COMPONENTS; ++c) {
59*61046927SAndroid Build Coastguard Worker       signed comp = MAX2(c - dst_comp, 0);
60*61046927SAndroid Build Coastguard Worker       out[c] = MIN2(swizzle[comp] + reg_comp, max_component);
61*61046927SAndroid Build Coastguard Worker    }
62*61046927SAndroid Build Coastguard Worker 
63*61046927SAndroid Build Coastguard Worker    memcpy(swizzle, out, sizeof(out));
64*61046927SAndroid Build Coastguard Worker }
65*61046927SAndroid Build Coastguard Worker 
66*61046927SAndroid Build Coastguard Worker /* Helper to return the default phys_reg for a given register */
67*61046927SAndroid Build Coastguard Worker 
68*61046927SAndroid Build Coastguard Worker static struct phys_reg
default_phys_reg(int reg,unsigned shift)69*61046927SAndroid Build Coastguard Worker default_phys_reg(int reg, unsigned shift)
70*61046927SAndroid Build Coastguard Worker {
71*61046927SAndroid Build Coastguard Worker    struct phys_reg r = {
72*61046927SAndroid Build Coastguard Worker       .reg = reg,
73*61046927SAndroid Build Coastguard Worker       .offset = 0,
74*61046927SAndroid Build Coastguard Worker       .shift = shift,
75*61046927SAndroid Build Coastguard Worker    };
76*61046927SAndroid Build Coastguard Worker 
77*61046927SAndroid Build Coastguard Worker    return r;
78*61046927SAndroid Build Coastguard Worker }
79*61046927SAndroid Build Coastguard Worker 
80*61046927SAndroid Build Coastguard Worker /* Determine which physical register, swizzle, and mask a virtual
81*61046927SAndroid Build Coastguard Worker  * register corresponds to */
82*61046927SAndroid Build Coastguard Worker 
83*61046927SAndroid Build Coastguard Worker static struct phys_reg
index_to_reg(compiler_context * ctx,struct lcra_state * l,unsigned reg,unsigned shift)84*61046927SAndroid Build Coastguard Worker index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg,
85*61046927SAndroid Build Coastguard Worker              unsigned shift)
86*61046927SAndroid Build Coastguard Worker {
87*61046927SAndroid Build Coastguard Worker    /* Check for special cases */
88*61046927SAndroid Build Coastguard Worker    if (reg == ~0)
89*61046927SAndroid Build Coastguard Worker       return default_phys_reg(REGISTER_UNUSED, shift);
90*61046927SAndroid Build Coastguard Worker    else if (reg >= SSA_FIXED_MINIMUM)
91*61046927SAndroid Build Coastguard Worker       return default_phys_reg(SSA_REG_FROM_FIXED(reg), shift);
92*61046927SAndroid Build Coastguard Worker    else if (!l)
93*61046927SAndroid Build Coastguard Worker       return default_phys_reg(REGISTER_UNUSED, shift);
94*61046927SAndroid Build Coastguard Worker 
95*61046927SAndroid Build Coastguard Worker    struct phys_reg r = {
96*61046927SAndroid Build Coastguard Worker       .reg = l->solutions[reg] / 16,
97*61046927SAndroid Build Coastguard Worker       .offset = l->solutions[reg] & 0xF,
98*61046927SAndroid Build Coastguard Worker       .shift = shift,
99*61046927SAndroid Build Coastguard Worker    };
100*61046927SAndroid Build Coastguard Worker 
101*61046927SAndroid Build Coastguard Worker    /* Report that we actually use this register, and return it */
102*61046927SAndroid Build Coastguard Worker 
103*61046927SAndroid Build Coastguard Worker    if (r.reg < 16)
104*61046927SAndroid Build Coastguard Worker       ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);
105*61046927SAndroid Build Coastguard Worker 
106*61046927SAndroid Build Coastguard Worker    return r;
107*61046927SAndroid Build Coastguard Worker }
108*61046927SAndroid Build Coastguard Worker 
109*61046927SAndroid Build Coastguard Worker static void
set_class(unsigned * classes,unsigned node,unsigned class)110*61046927SAndroid Build Coastguard Worker set_class(unsigned *classes, unsigned node, unsigned class)
111*61046927SAndroid Build Coastguard Worker {
112*61046927SAndroid Build Coastguard Worker    if (node < SSA_FIXED_MINIMUM && class != classes[node]) {
113*61046927SAndroid Build Coastguard Worker       assert(classes[node] == REG_CLASS_WORK);
114*61046927SAndroid Build Coastguard Worker       classes[node] = class;
115*61046927SAndroid Build Coastguard Worker    }
116*61046927SAndroid Build Coastguard Worker }
117*61046927SAndroid Build Coastguard Worker 
118*61046927SAndroid Build Coastguard Worker /* Special register classes impose special constraints on who can read their
119*61046927SAndroid Build Coastguard Worker  * values, so check that */
120*61046927SAndroid Build Coastguard Worker 
121*61046927SAndroid Build Coastguard Worker static bool ASSERTED
check_read_class(unsigned * classes,unsigned tag,unsigned node)122*61046927SAndroid Build Coastguard Worker check_read_class(unsigned *classes, unsigned tag, unsigned node)
123*61046927SAndroid Build Coastguard Worker {
124*61046927SAndroid Build Coastguard Worker    /* Non-nodes are implicitly ok */
125*61046927SAndroid Build Coastguard Worker    if (node >= SSA_FIXED_MINIMUM)
126*61046927SAndroid Build Coastguard Worker       return true;
127*61046927SAndroid Build Coastguard Worker 
128*61046927SAndroid Build Coastguard Worker    switch (classes[node]) {
129*61046927SAndroid Build Coastguard Worker    case REG_CLASS_LDST:
130*61046927SAndroid Build Coastguard Worker       return (tag == TAG_LOAD_STORE_4);
131*61046927SAndroid Build Coastguard Worker    case REG_CLASS_TEXR:
132*61046927SAndroid Build Coastguard Worker       return (tag == TAG_TEXTURE_4);
133*61046927SAndroid Build Coastguard Worker    case REG_CLASS_TEXW:
134*61046927SAndroid Build Coastguard Worker       return (tag != TAG_LOAD_STORE_4);
135*61046927SAndroid Build Coastguard Worker    case REG_CLASS_WORK:
136*61046927SAndroid Build Coastguard Worker       return IS_ALU(tag);
137*61046927SAndroid Build Coastguard Worker    default:
138*61046927SAndroid Build Coastguard Worker       unreachable("Invalid class");
139*61046927SAndroid Build Coastguard Worker    }
140*61046927SAndroid Build Coastguard Worker }
141*61046927SAndroid Build Coastguard Worker 
142*61046927SAndroid Build Coastguard Worker static bool ASSERTED
check_write_class(unsigned * classes,unsigned tag,unsigned node)143*61046927SAndroid Build Coastguard Worker check_write_class(unsigned *classes, unsigned tag, unsigned node)
144*61046927SAndroid Build Coastguard Worker {
145*61046927SAndroid Build Coastguard Worker    /* Non-nodes are implicitly ok */
146*61046927SAndroid Build Coastguard Worker    if (node >= SSA_FIXED_MINIMUM)
147*61046927SAndroid Build Coastguard Worker       return true;
148*61046927SAndroid Build Coastguard Worker 
149*61046927SAndroid Build Coastguard Worker    switch (classes[node]) {
150*61046927SAndroid Build Coastguard Worker    case REG_CLASS_TEXR:
151*61046927SAndroid Build Coastguard Worker       return true;
152*61046927SAndroid Build Coastguard Worker    case REG_CLASS_TEXW:
153*61046927SAndroid Build Coastguard Worker       return (tag == TAG_TEXTURE_4);
154*61046927SAndroid Build Coastguard Worker    case REG_CLASS_LDST:
155*61046927SAndroid Build Coastguard Worker    case REG_CLASS_WORK:
156*61046927SAndroid Build Coastguard Worker       return IS_ALU(tag) || (tag == TAG_LOAD_STORE_4);
157*61046927SAndroid Build Coastguard Worker    default:
158*61046927SAndroid Build Coastguard Worker       unreachable("Invalid class");
159*61046927SAndroid Build Coastguard Worker    }
160*61046927SAndroid Build Coastguard Worker }
161*61046927SAndroid Build Coastguard Worker 
162*61046927SAndroid Build Coastguard Worker /* Prepass before RA to ensure special class restrictions are met. The idea is
163*61046927SAndroid Build Coastguard Worker  * to create a bit field of types of instructions that read a particular index.
164*61046927SAndroid Build Coastguard Worker  * Later, we'll add moves as appropriate and rewrite to specialize by type. */
165*61046927SAndroid Build Coastguard Worker 
166*61046927SAndroid Build Coastguard Worker static void
mark_node_class(unsigned * bitfield,unsigned node)167*61046927SAndroid Build Coastguard Worker mark_node_class(unsigned *bitfield, unsigned node)
168*61046927SAndroid Build Coastguard Worker {
169*61046927SAndroid Build Coastguard Worker    if (node < SSA_FIXED_MINIMUM)
170*61046927SAndroid Build Coastguard Worker       BITSET_SET(bitfield, node);
171*61046927SAndroid Build Coastguard Worker }
172*61046927SAndroid Build Coastguard Worker 
173*61046927SAndroid Build Coastguard Worker void
mir_lower_special_reads(compiler_context * ctx)174*61046927SAndroid Build Coastguard Worker mir_lower_special_reads(compiler_context *ctx)
175*61046927SAndroid Build Coastguard Worker {
176*61046927SAndroid Build Coastguard Worker    mir_compute_temp_count(ctx);
177*61046927SAndroid Build Coastguard Worker    size_t sz = BITSET_WORDS(ctx->temp_count) * sizeof(BITSET_WORD);
178*61046927SAndroid Build Coastguard Worker 
179*61046927SAndroid Build Coastguard Worker    /* Bitfields for the various types of registers we could have. aluw can
180*61046927SAndroid Build Coastguard Worker     * be written by either ALU or load/store */
181*61046927SAndroid Build Coastguard Worker 
182*61046927SAndroid Build Coastguard Worker    unsigned *alur = calloc(sz, 1);
183*61046927SAndroid Build Coastguard Worker    unsigned *aluw = calloc(sz, 1);
184*61046927SAndroid Build Coastguard Worker    unsigned *brar = calloc(sz, 1);
185*61046927SAndroid Build Coastguard Worker    unsigned *ldst = calloc(sz, 1);
186*61046927SAndroid Build Coastguard Worker    unsigned *texr = calloc(sz, 1);
187*61046927SAndroid Build Coastguard Worker    unsigned *texw = calloc(sz, 1);
188*61046927SAndroid Build Coastguard Worker 
189*61046927SAndroid Build Coastguard Worker    /* Pass #1 is analysis, a linear scan to fill out the bitfields */
190*61046927SAndroid Build Coastguard Worker 
191*61046927SAndroid Build Coastguard Worker    mir_foreach_instr_global(ctx, ins) {
192*61046927SAndroid Build Coastguard Worker       switch (ins->type) {
193*61046927SAndroid Build Coastguard Worker       case TAG_ALU_4:
194*61046927SAndroid Build Coastguard Worker          mark_node_class(aluw, ins->dest);
195*61046927SAndroid Build Coastguard Worker          mark_node_class(alur, ins->src[0]);
196*61046927SAndroid Build Coastguard Worker          mark_node_class(alur, ins->src[1]);
197*61046927SAndroid Build Coastguard Worker          mark_node_class(alur, ins->src[2]);
198*61046927SAndroid Build Coastguard Worker 
199*61046927SAndroid Build Coastguard Worker          if (ins->compact_branch && ins->writeout)
200*61046927SAndroid Build Coastguard Worker             mark_node_class(brar, ins->src[0]);
201*61046927SAndroid Build Coastguard Worker 
202*61046927SAndroid Build Coastguard Worker          break;
203*61046927SAndroid Build Coastguard Worker 
204*61046927SAndroid Build Coastguard Worker       case TAG_LOAD_STORE_4:
205*61046927SAndroid Build Coastguard Worker          mark_node_class(aluw, ins->dest);
206*61046927SAndroid Build Coastguard Worker          mark_node_class(ldst, ins->src[0]);
207*61046927SAndroid Build Coastguard Worker          mark_node_class(ldst, ins->src[1]);
208*61046927SAndroid Build Coastguard Worker          mark_node_class(ldst, ins->src[2]);
209*61046927SAndroid Build Coastguard Worker          mark_node_class(ldst, ins->src[3]);
210*61046927SAndroid Build Coastguard Worker          break;
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker       case TAG_TEXTURE_4:
213*61046927SAndroid Build Coastguard Worker          mark_node_class(texr, ins->src[0]);
214*61046927SAndroid Build Coastguard Worker          mark_node_class(texr, ins->src[1]);
215*61046927SAndroid Build Coastguard Worker          mark_node_class(texr, ins->src[2]);
216*61046927SAndroid Build Coastguard Worker          mark_node_class(texw, ins->dest);
217*61046927SAndroid Build Coastguard Worker          break;
218*61046927SAndroid Build Coastguard Worker 
219*61046927SAndroid Build Coastguard Worker       default:
220*61046927SAndroid Build Coastguard Worker          break;
221*61046927SAndroid Build Coastguard Worker       }
222*61046927SAndroid Build Coastguard Worker    }
223*61046927SAndroid Build Coastguard Worker 
224*61046927SAndroid Build Coastguard Worker    /* Pass #2 is lowering now that we've analyzed all the classes.
225*61046927SAndroid Build Coastguard Worker     * Conceptually, if an index is only marked for a single type of use,
226*61046927SAndroid Build Coastguard Worker     * there is nothing to lower. If it is marked for different uses, we
227*61046927SAndroid Build Coastguard Worker     * split up based on the number of types of uses. To do so, we divide
228*61046927SAndroid Build Coastguard Worker     * into N distinct classes of use (where N>1 by definition), emit N-1
229*61046927SAndroid Build Coastguard Worker     * moves from the index to copies of the index, and finally rewrite N-1
230*61046927SAndroid Build Coastguard Worker     * of the types of uses to use the corresponding move */
231*61046927SAndroid Build Coastguard Worker 
232*61046927SAndroid Build Coastguard Worker    unsigned spill_idx = ctx->temp_count;
233*61046927SAndroid Build Coastguard Worker 
234*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ctx->temp_count; ++i) {
235*61046927SAndroid Build Coastguard Worker       bool is_alur = BITSET_TEST(alur, i);
236*61046927SAndroid Build Coastguard Worker       bool is_aluw = BITSET_TEST(aluw, i);
237*61046927SAndroid Build Coastguard Worker       bool is_brar = BITSET_TEST(brar, i);
238*61046927SAndroid Build Coastguard Worker       bool is_ldst = BITSET_TEST(ldst, i);
239*61046927SAndroid Build Coastguard Worker       bool is_texr = BITSET_TEST(texr, i);
240*61046927SAndroid Build Coastguard Worker       bool is_texw = BITSET_TEST(texw, i);
241*61046927SAndroid Build Coastguard Worker 
242*61046927SAndroid Build Coastguard Worker       /* Analyse to check how many distinct uses there are. ALU ops
243*61046927SAndroid Build Coastguard Worker        * (alur) can read the results of the texture pipeline (texw)
244*61046927SAndroid Build Coastguard Worker        * but not ldst or texr. Load/store ops (ldst) cannot read
245*61046927SAndroid Build Coastguard Worker        * anything but load/store inputs. Texture pipeline cannot read
246*61046927SAndroid Build Coastguard Worker        * anything but texture inputs. TODO: Simplify.  */
247*61046927SAndroid Build Coastguard Worker 
248*61046927SAndroid Build Coastguard Worker       bool collision = (is_alur && (is_ldst || is_texr)) ||
249*61046927SAndroid Build Coastguard Worker                        (is_ldst && (is_alur || is_texr || is_texw)) ||
250*61046927SAndroid Build Coastguard Worker                        (is_texr && (is_alur || is_ldst || is_texw)) ||
251*61046927SAndroid Build Coastguard Worker                        (is_texw && (is_aluw || is_ldst || is_texr)) ||
252*61046927SAndroid Build Coastguard Worker                        (is_brar && is_texw);
253*61046927SAndroid Build Coastguard Worker 
254*61046927SAndroid Build Coastguard Worker       if (!collision)
255*61046927SAndroid Build Coastguard Worker          continue;
256*61046927SAndroid Build Coastguard Worker 
257*61046927SAndroid Build Coastguard Worker       /* Use the index as-is as the work copy. Emit copies for
258*61046927SAndroid Build Coastguard Worker        * special uses */
259*61046927SAndroid Build Coastguard Worker 
260*61046927SAndroid Build Coastguard Worker       unsigned classes[] = {TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4,
261*61046927SAndroid Build Coastguard Worker                             TAG_ALU_4};
262*61046927SAndroid Build Coastguard Worker       bool collisions[] = {is_ldst, is_texr, is_texw && is_aluw, is_brar};
263*61046927SAndroid Build Coastguard Worker 
264*61046927SAndroid Build Coastguard Worker       for (unsigned j = 0; j < ARRAY_SIZE(collisions); ++j) {
265*61046927SAndroid Build Coastguard Worker          if (!collisions[j])
266*61046927SAndroid Build Coastguard Worker             continue;
267*61046927SAndroid Build Coastguard Worker 
268*61046927SAndroid Build Coastguard Worker          /* When the hazard is from reading, we move and rewrite
269*61046927SAndroid Build Coastguard Worker           * sources (typical case). When it's from writing, we
270*61046927SAndroid Build Coastguard Worker           * flip the move and rewrite destinations (obscure,
271*61046927SAndroid Build Coastguard Worker           * only from control flow -- impossible in SSA) */
272*61046927SAndroid Build Coastguard Worker 
273*61046927SAndroid Build Coastguard Worker          bool hazard_write = (j == 2);
274*61046927SAndroid Build Coastguard Worker 
275*61046927SAndroid Build Coastguard Worker          unsigned idx = spill_idx++;
276*61046927SAndroid Build Coastguard Worker 
277*61046927SAndroid Build Coastguard Worker          /* Insert move before each read/write, depending on the
278*61046927SAndroid Build Coastguard Worker           * hazard we're trying to account for */
279*61046927SAndroid Build Coastguard Worker 
280*61046927SAndroid Build Coastguard Worker          mir_foreach_block(ctx, block_) {
281*61046927SAndroid Build Coastguard Worker             midgard_block *block = (midgard_block *)block_;
282*61046927SAndroid Build Coastguard Worker             midgard_instruction *mov = NULL;
283*61046927SAndroid Build Coastguard Worker 
284*61046927SAndroid Build Coastguard Worker             mir_foreach_instr_in_block_safe(block, pre_use) {
285*61046927SAndroid Build Coastguard Worker                if (pre_use->type != classes[j])
286*61046927SAndroid Build Coastguard Worker                   continue;
287*61046927SAndroid Build Coastguard Worker 
288*61046927SAndroid Build Coastguard Worker                if (hazard_write) {
289*61046927SAndroid Build Coastguard Worker                   if (pre_use->dest != i)
290*61046927SAndroid Build Coastguard Worker                      continue;
291*61046927SAndroid Build Coastguard Worker 
292*61046927SAndroid Build Coastguard Worker                   midgard_instruction m = v_mov(idx, i);
293*61046927SAndroid Build Coastguard Worker                   m.dest_type = pre_use->dest_type;
294*61046927SAndroid Build Coastguard Worker                   m.src_types[1] = m.dest_type;
295*61046927SAndroid Build Coastguard Worker                   m.mask = pre_use->mask;
296*61046927SAndroid Build Coastguard Worker 
297*61046927SAndroid Build Coastguard Worker                   midgard_instruction *use = mir_next_op(pre_use);
298*61046927SAndroid Build Coastguard Worker                   assert(use);
299*61046927SAndroid Build Coastguard Worker                   mir_insert_instruction_before(ctx, use, m);
300*61046927SAndroid Build Coastguard Worker                   mir_rewrite_index_dst_single(pre_use, i, idx);
301*61046927SAndroid Build Coastguard Worker                } else {
302*61046927SAndroid Build Coastguard Worker                   if (!mir_has_arg(pre_use, i))
303*61046927SAndroid Build Coastguard Worker                      continue;
304*61046927SAndroid Build Coastguard Worker 
305*61046927SAndroid Build Coastguard Worker                   unsigned mask = mir_from_bytemask(
306*61046927SAndroid Build Coastguard Worker                      mir_round_bytemask_up(
307*61046927SAndroid Build Coastguard Worker                         mir_bytemask_of_read_components(pre_use, i), 32),
308*61046927SAndroid Build Coastguard Worker                      32);
309*61046927SAndroid Build Coastguard Worker 
310*61046927SAndroid Build Coastguard Worker                   if (mov == NULL || !mir_is_ssa(i)) {
311*61046927SAndroid Build Coastguard Worker                      midgard_instruction m = v_mov(i, spill_idx++);
312*61046927SAndroid Build Coastguard Worker                      m.mask = mask;
313*61046927SAndroid Build Coastguard Worker                      mov = mir_insert_instruction_before(ctx, pre_use, m);
314*61046927SAndroid Build Coastguard Worker                   } else {
315*61046927SAndroid Build Coastguard Worker                      mov->mask |= mask;
316*61046927SAndroid Build Coastguard Worker                   }
317*61046927SAndroid Build Coastguard Worker 
318*61046927SAndroid Build Coastguard Worker                   mir_rewrite_index_src_single(pre_use, i, mov->dest);
319*61046927SAndroid Build Coastguard Worker                }
320*61046927SAndroid Build Coastguard Worker             }
321*61046927SAndroid Build Coastguard Worker          }
322*61046927SAndroid Build Coastguard Worker       }
323*61046927SAndroid Build Coastguard Worker    }
324*61046927SAndroid Build Coastguard Worker 
325*61046927SAndroid Build Coastguard Worker    free(alur);
326*61046927SAndroid Build Coastguard Worker    free(aluw);
327*61046927SAndroid Build Coastguard Worker    free(brar);
328*61046927SAndroid Build Coastguard Worker    free(ldst);
329*61046927SAndroid Build Coastguard Worker    free(texr);
330*61046927SAndroid Build Coastguard Worker    free(texw);
331*61046927SAndroid Build Coastguard Worker }
332*61046927SAndroid Build Coastguard Worker 
333*61046927SAndroid Build Coastguard Worker static void
mir_compute_interference(compiler_context * ctx,struct lcra_state * l)334*61046927SAndroid Build Coastguard Worker mir_compute_interference(compiler_context *ctx, struct lcra_state *l)
335*61046927SAndroid Build Coastguard Worker {
336*61046927SAndroid Build Coastguard Worker    /* First, we need liveness information to be computed per block */
337*61046927SAndroid Build Coastguard Worker    mir_compute_liveness(ctx);
338*61046927SAndroid Build Coastguard Worker 
339*61046927SAndroid Build Coastguard Worker    /* We need to force r1.w live throughout a blend shader */
340*61046927SAndroid Build Coastguard Worker 
341*61046927SAndroid Build Coastguard Worker    if (ctx->inputs->is_blend) {
342*61046927SAndroid Build Coastguard Worker       unsigned r1w = ~0;
343*61046927SAndroid Build Coastguard Worker 
344*61046927SAndroid Build Coastguard Worker       mir_foreach_block(ctx, _block) {
345*61046927SAndroid Build Coastguard Worker          midgard_block *block = (midgard_block *)_block;
346*61046927SAndroid Build Coastguard Worker          mir_foreach_instr_in_block_rev(block, ins) {
347*61046927SAndroid Build Coastguard Worker             if (ins->writeout)
348*61046927SAndroid Build Coastguard Worker                r1w = ins->dest;
349*61046927SAndroid Build Coastguard Worker          }
350*61046927SAndroid Build Coastguard Worker 
351*61046927SAndroid Build Coastguard Worker          if (r1w != ~0)
352*61046927SAndroid Build Coastguard Worker             break;
353*61046927SAndroid Build Coastguard Worker       }
354*61046927SAndroid Build Coastguard Worker 
355*61046927SAndroid Build Coastguard Worker       mir_foreach_instr_global(ctx, ins) {
356*61046927SAndroid Build Coastguard Worker          if (ins->dest < ctx->temp_count)
357*61046927SAndroid Build Coastguard Worker             lcra_add_node_interference(l, ins->dest, mir_bytemask(ins), r1w,
358*61046927SAndroid Build Coastguard Worker                                        0xF);
359*61046927SAndroid Build Coastguard Worker       }
360*61046927SAndroid Build Coastguard Worker    }
361*61046927SAndroid Build Coastguard Worker 
362*61046927SAndroid Build Coastguard Worker    /* Now that every block has live_in/live_out computed, we can determine
363*61046927SAndroid Build Coastguard Worker     * interference by walking each block linearly. Take live_out at the
364*61046927SAndroid Build Coastguard Worker     * end of each block and walk the block backwards. */
365*61046927SAndroid Build Coastguard Worker 
366*61046927SAndroid Build Coastguard Worker    mir_foreach_block(ctx, _blk) {
367*61046927SAndroid Build Coastguard Worker       midgard_block *blk = (midgard_block *)_blk;
368*61046927SAndroid Build Coastguard Worker 
369*61046927SAndroid Build Coastguard Worker       /* The scalar and vector units run in parallel. We need to make
370*61046927SAndroid Build Coastguard Worker        * sure they don't write to same portion of the register file
371*61046927SAndroid Build Coastguard Worker        * otherwise the result is undefined. Add interferences to
372*61046927SAndroid Build Coastguard Worker        * avoid this situation.
373*61046927SAndroid Build Coastguard Worker        */
374*61046927SAndroid Build Coastguard Worker       util_dynarray_foreach(&blk->bundles, midgard_bundle, bundle) {
375*61046927SAndroid Build Coastguard Worker          midgard_instruction *instrs[2][4];
376*61046927SAndroid Build Coastguard Worker          unsigned instr_count[2] = {0, 0};
377*61046927SAndroid Build Coastguard Worker 
378*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < bundle->instruction_count; i++) {
379*61046927SAndroid Build Coastguard Worker             if (bundle->instructions[i]->unit == UNIT_VMUL ||
380*61046927SAndroid Build Coastguard Worker                 bundle->instructions[i]->unit == UNIT_SADD)
381*61046927SAndroid Build Coastguard Worker                instrs[0][instr_count[0]++] = bundle->instructions[i];
382*61046927SAndroid Build Coastguard Worker             else
383*61046927SAndroid Build Coastguard Worker                instrs[1][instr_count[1]++] = bundle->instructions[i];
384*61046927SAndroid Build Coastguard Worker          }
385*61046927SAndroid Build Coastguard Worker 
386*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < ARRAY_SIZE(instr_count); i++) {
387*61046927SAndroid Build Coastguard Worker             for (unsigned j = 0; j < instr_count[i]; j++) {
388*61046927SAndroid Build Coastguard Worker                midgard_instruction *ins_a = instrs[i][j];
389*61046927SAndroid Build Coastguard Worker 
390*61046927SAndroid Build Coastguard Worker                if (ins_a->dest >= ctx->temp_count)
391*61046927SAndroid Build Coastguard Worker                   continue;
392*61046927SAndroid Build Coastguard Worker 
393*61046927SAndroid Build Coastguard Worker                for (unsigned k = j + 1; k < instr_count[i]; k++) {
394*61046927SAndroid Build Coastguard Worker                   midgard_instruction *ins_b = instrs[i][k];
395*61046927SAndroid Build Coastguard Worker 
396*61046927SAndroid Build Coastguard Worker                   if (ins_b->dest >= ctx->temp_count)
397*61046927SAndroid Build Coastguard Worker                      continue;
398*61046927SAndroid Build Coastguard Worker 
399*61046927SAndroid Build Coastguard Worker                   lcra_add_node_interference(l, ins_b->dest,
400*61046927SAndroid Build Coastguard Worker                                              mir_bytemask(ins_b), ins_a->dest,
401*61046927SAndroid Build Coastguard Worker                                              mir_bytemask(ins_a));
402*61046927SAndroid Build Coastguard Worker                }
403*61046927SAndroid Build Coastguard Worker             }
404*61046927SAndroid Build Coastguard Worker          }
405*61046927SAndroid Build Coastguard Worker       }
406*61046927SAndroid Build Coastguard Worker 
407*61046927SAndroid Build Coastguard Worker       uint16_t *live =
408*61046927SAndroid Build Coastguard Worker          mem_dup(_blk->live_out, ctx->temp_count * sizeof(uint16_t));
409*61046927SAndroid Build Coastguard Worker 
410*61046927SAndroid Build Coastguard Worker       mir_foreach_instr_in_block_rev(blk, ins) {
411*61046927SAndroid Build Coastguard Worker          /* Mark all registers live after the instruction as
412*61046927SAndroid Build Coastguard Worker           * interfering with the destination */
413*61046927SAndroid Build Coastguard Worker 
414*61046927SAndroid Build Coastguard Worker          unsigned dest = ins->dest;
415*61046927SAndroid Build Coastguard Worker 
416*61046927SAndroid Build Coastguard Worker          if (dest < ctx->temp_count) {
417*61046927SAndroid Build Coastguard Worker             for (unsigned i = 0; i < ctx->temp_count; ++i) {
418*61046927SAndroid Build Coastguard Worker                if (live[i]) {
419*61046927SAndroid Build Coastguard Worker                   unsigned mask = mir_bytemask(ins);
420*61046927SAndroid Build Coastguard Worker                   lcra_add_node_interference(l, dest, mask, i, live[i]);
421*61046927SAndroid Build Coastguard Worker                }
422*61046927SAndroid Build Coastguard Worker             }
423*61046927SAndroid Build Coastguard Worker          }
424*61046927SAndroid Build Coastguard Worker 
425*61046927SAndroid Build Coastguard Worker          /* Add blend shader interference: blend shaders might
426*61046927SAndroid Build Coastguard Worker           * clobber r0-r3. */
427*61046927SAndroid Build Coastguard Worker          if (ins->compact_branch && ins->writeout) {
428*61046927SAndroid Build Coastguard Worker             for (unsigned i = 0; i < ctx->temp_count; ++i) {
429*61046927SAndroid Build Coastguard Worker                if (!live[i])
430*61046927SAndroid Build Coastguard Worker                   continue;
431*61046927SAndroid Build Coastguard Worker 
432*61046927SAndroid Build Coastguard Worker                for (unsigned j = 0; j < 4; j++) {
433*61046927SAndroid Build Coastguard Worker                   lcra_add_node_interference(l, ctx->temp_count + j, 0xFFFF, i,
434*61046927SAndroid Build Coastguard Worker                                              live[i]);
435*61046927SAndroid Build Coastguard Worker                }
436*61046927SAndroid Build Coastguard Worker             }
437*61046927SAndroid Build Coastguard Worker          }
438*61046927SAndroid Build Coastguard Worker 
439*61046927SAndroid Build Coastguard Worker          /* Update live_in */
440*61046927SAndroid Build Coastguard Worker          mir_liveness_ins_update(live, ins, ctx->temp_count);
441*61046927SAndroid Build Coastguard Worker       }
442*61046927SAndroid Build Coastguard Worker 
443*61046927SAndroid Build Coastguard Worker       free(live);
444*61046927SAndroid Build Coastguard Worker    }
445*61046927SAndroid Build Coastguard Worker }
446*61046927SAndroid Build Coastguard Worker 
447*61046927SAndroid Build Coastguard Worker static bool
mir_is_64(midgard_instruction * ins)448*61046927SAndroid Build Coastguard Worker mir_is_64(midgard_instruction *ins)
449*61046927SAndroid Build Coastguard Worker {
450*61046927SAndroid Build Coastguard Worker    if (nir_alu_type_get_type_size(ins->dest_type) == 64)
451*61046927SAndroid Build Coastguard Worker       return true;
452*61046927SAndroid Build Coastguard Worker 
453*61046927SAndroid Build Coastguard Worker    mir_foreach_src(ins, v) {
454*61046927SAndroid Build Coastguard Worker       if (nir_alu_type_get_type_size(ins->src_types[v]) == 64)
455*61046927SAndroid Build Coastguard Worker          return true;
456*61046927SAndroid Build Coastguard Worker    }
457*61046927SAndroid Build Coastguard Worker 
458*61046927SAndroid Build Coastguard Worker    return false;
459*61046927SAndroid Build Coastguard Worker }
460*61046927SAndroid Build Coastguard Worker 
461*61046927SAndroid Build Coastguard Worker /*
462*61046927SAndroid Build Coastguard Worker  * Determine if a shader needs a contiguous workgroup. This impacts register
463*61046927SAndroid Build Coastguard Worker  * allocation. TODO: Optimize if barriers and local memory are unused.
464*61046927SAndroid Build Coastguard Worker  */
465*61046927SAndroid Build Coastguard Worker static bool
needs_contiguous_workgroup(compiler_context * ctx)466*61046927SAndroid Build Coastguard Worker needs_contiguous_workgroup(compiler_context *ctx)
467*61046927SAndroid Build Coastguard Worker {
468*61046927SAndroid Build Coastguard Worker    return gl_shader_stage_uses_workgroup(ctx->stage);
469*61046927SAndroid Build Coastguard Worker }
470*61046927SAndroid Build Coastguard Worker 
471*61046927SAndroid Build Coastguard Worker /*
472*61046927SAndroid Build Coastguard Worker  * Determine an upper-bound on the number of threads in a workgroup. The GL
473*61046927SAndroid Build Coastguard Worker  * driver reports 128 for the maximum number of threads (the minimum-maximum in
474*61046927SAndroid Build Coastguard Worker  * OpenGL ES 3.1), so we pessimistically assume 128 threads for variable
475*61046927SAndroid Build Coastguard Worker  * workgroups.
476*61046927SAndroid Build Coastguard Worker  */
477*61046927SAndroid Build Coastguard Worker static unsigned
max_threads_per_workgroup(compiler_context * ctx)478*61046927SAndroid Build Coastguard Worker max_threads_per_workgroup(compiler_context *ctx)
479*61046927SAndroid Build Coastguard Worker {
480*61046927SAndroid Build Coastguard Worker    if (ctx->nir->info.workgroup_size_variable) {
481*61046927SAndroid Build Coastguard Worker       return 128;
482*61046927SAndroid Build Coastguard Worker    } else {
483*61046927SAndroid Build Coastguard Worker       return ctx->nir->info.workgroup_size[0] *
484*61046927SAndroid Build Coastguard Worker              ctx->nir->info.workgroup_size[1] *
485*61046927SAndroid Build Coastguard Worker              ctx->nir->info.workgroup_size[2];
486*61046927SAndroid Build Coastguard Worker    }
487*61046927SAndroid Build Coastguard Worker }
488*61046927SAndroid Build Coastguard Worker 
489*61046927SAndroid Build Coastguard Worker /*
490*61046927SAndroid Build Coastguard Worker  * Calculate the maximum number of work registers available to the shader.
491*61046927SAndroid Build Coastguard Worker  * Architecturally, Midgard shaders may address up to 16 work registers, but
492*61046927SAndroid Build Coastguard Worker  * various features impose other limits:
493*61046927SAndroid Build Coastguard Worker  *
494*61046927SAndroid Build Coastguard Worker  * 1. Blend shaders are limited to 8 registers by ABI.
495*61046927SAndroid Build Coastguard Worker  * 2. If there are more than 8 register-mapped uniforms, then additional
496*61046927SAndroid Build Coastguard Worker  *    register-mapped uniforms use space that otherwise would be used for work
497*61046927SAndroid Build Coastguard Worker  *    registers.
498*61046927SAndroid Build Coastguard Worker  * 3. If more than 4 registers are used, at most 128 threads may be spawned. If
499*61046927SAndroid Build Coastguard Worker  *    more than 8 registers are used, at most 64 threads may be spawned. These
500*61046927SAndroid Build Coastguard Worker  *    limits are architecturally visible in compute kernels that require an
501*61046927SAndroid Build Coastguard Worker  *    entire workgroup to be spawned at once (for barriers or local memory to
502*61046927SAndroid Build Coastguard Worker  *    work properly).
503*61046927SAndroid Build Coastguard Worker  */
504*61046927SAndroid Build Coastguard Worker static unsigned
max_work_registers(compiler_context * ctx)505*61046927SAndroid Build Coastguard Worker max_work_registers(compiler_context *ctx)
506*61046927SAndroid Build Coastguard Worker {
507*61046927SAndroid Build Coastguard Worker    if (ctx->inputs->is_blend)
508*61046927SAndroid Build Coastguard Worker       return 8;
509*61046927SAndroid Build Coastguard Worker 
510*61046927SAndroid Build Coastguard Worker    unsigned rmu_vec4 = ctx->info->push.count / 4;
511*61046927SAndroid Build Coastguard Worker    unsigned max_work_registers = (rmu_vec4 >= 8) ? (24 - rmu_vec4) : 16;
512*61046927SAndroid Build Coastguard Worker 
513*61046927SAndroid Build Coastguard Worker    if (needs_contiguous_workgroup(ctx)) {
514*61046927SAndroid Build Coastguard Worker       unsigned threads = max_threads_per_workgroup(ctx);
515*61046927SAndroid Build Coastguard Worker       assert(threads <= 128 && "maximum threads in ABI exceeded");
516*61046927SAndroid Build Coastguard Worker 
517*61046927SAndroid Build Coastguard Worker       if (threads > 64)
518*61046927SAndroid Build Coastguard Worker          max_work_registers = MIN2(max_work_registers, 8);
519*61046927SAndroid Build Coastguard Worker    }
520*61046927SAndroid Build Coastguard Worker 
521*61046927SAndroid Build Coastguard Worker    return max_work_registers;
522*61046927SAndroid Build Coastguard Worker }
523*61046927SAndroid Build Coastguard Worker 
524*61046927SAndroid Build Coastguard Worker /* This routine performs the actual register allocation. It should be succeeded
525*61046927SAndroid Build Coastguard Worker  * by install_registers */
526*61046927SAndroid Build Coastguard Worker 
527*61046927SAndroid Build Coastguard Worker static struct lcra_state *
allocate_registers(compiler_context * ctx,bool * spilled)528*61046927SAndroid Build Coastguard Worker allocate_registers(compiler_context *ctx, bool *spilled)
529*61046927SAndroid Build Coastguard Worker {
530*61046927SAndroid Build Coastguard Worker    int work_count = max_work_registers(ctx);
531*61046927SAndroid Build Coastguard Worker 
532*61046927SAndroid Build Coastguard Worker    /* No register allocation to do with no SSA */
533*61046927SAndroid Build Coastguard Worker    mir_compute_temp_count(ctx);
534*61046927SAndroid Build Coastguard Worker    if (!ctx->temp_count)
535*61046927SAndroid Build Coastguard Worker       return NULL;
536*61046927SAndroid Build Coastguard Worker 
537*61046927SAndroid Build Coastguard Worker    /* Initialize LCRA. Allocate extra node at the end for r1-r3 for
538*61046927SAndroid Build Coastguard Worker     * interference */
539*61046927SAndroid Build Coastguard Worker 
540*61046927SAndroid Build Coastguard Worker    struct lcra_state *l = lcra_alloc_equations(ctx->temp_count + 4, 5);
541*61046927SAndroid Build Coastguard Worker    unsigned node_r1 = ctx->temp_count + 1;
542*61046927SAndroid Build Coastguard Worker 
543*61046927SAndroid Build Coastguard Worker    /* Starts of classes, in bytes */
544*61046927SAndroid Build Coastguard Worker    l->class_start[REG_CLASS_WORK] = 16 * 0;
545*61046927SAndroid Build Coastguard Worker    l->class_start[REG_CLASS_LDST] = 16 * 26;
546*61046927SAndroid Build Coastguard Worker    l->class_start[REG_CLASS_TEXR] = 16 * 28;
547*61046927SAndroid Build Coastguard Worker    l->class_start[REG_CLASS_TEXW] = 16 * 28;
548*61046927SAndroid Build Coastguard Worker 
549*61046927SAndroid Build Coastguard Worker    l->class_size[REG_CLASS_WORK] = 16 * work_count;
550*61046927SAndroid Build Coastguard Worker    l->class_size[REG_CLASS_LDST] = 16 * 2;
551*61046927SAndroid Build Coastguard Worker    l->class_size[REG_CLASS_TEXR] = 16 * 2;
552*61046927SAndroid Build Coastguard Worker    l->class_size[REG_CLASS_TEXW] = 16 * 2;
553*61046927SAndroid Build Coastguard Worker 
554*61046927SAndroid Build Coastguard Worker    lcra_set_disjoint_class(l, REG_CLASS_TEXR, REG_CLASS_TEXW);
555*61046927SAndroid Build Coastguard Worker 
556*61046927SAndroid Build Coastguard Worker    /* To save space on T*20, we don't have real texture registers.
557*61046927SAndroid Build Coastguard Worker     * Instead, tex inputs reuse the load/store pipeline registers, and
558*61046927SAndroid Build Coastguard Worker     * tex outputs use work r0/r1. Note we still use TEXR/TEXW classes,
559*61046927SAndroid Build Coastguard Worker     * noting that this handles interferences and sizes correctly. */
560*61046927SAndroid Build Coastguard Worker 
561*61046927SAndroid Build Coastguard Worker    if (ctx->quirks & MIDGARD_INTERPIPE_REG_ALIASING) {
562*61046927SAndroid Build Coastguard Worker       l->class_start[REG_CLASS_TEXR] = l->class_start[REG_CLASS_LDST];
563*61046927SAndroid Build Coastguard Worker       l->class_start[REG_CLASS_TEXW] = l->class_start[REG_CLASS_WORK];
564*61046927SAndroid Build Coastguard Worker    }
565*61046927SAndroid Build Coastguard Worker 
566*61046927SAndroid Build Coastguard Worker    unsigned *found_class = calloc(sizeof(unsigned), ctx->temp_count);
567*61046927SAndroid Build Coastguard Worker    unsigned *min_alignment = calloc(sizeof(unsigned), ctx->temp_count);
568*61046927SAndroid Build Coastguard Worker    unsigned *min_bound = calloc(sizeof(unsigned), ctx->temp_count);
569*61046927SAndroid Build Coastguard Worker 
570*61046927SAndroid Build Coastguard Worker    mir_foreach_instr_global(ctx, ins) {
571*61046927SAndroid Build Coastguard Worker       /* Swizzles of 32-bit sources on 64-bit instructions need to be
572*61046927SAndroid Build Coastguard Worker        * aligned to either bottom (xy) or top (zw). More general
573*61046927SAndroid Build Coastguard Worker        * swizzle lowering should happen prior to scheduling (TODO),
574*61046927SAndroid Build Coastguard Worker        * but once we get RA we shouldn't disrupt this further. Align
575*61046927SAndroid Build Coastguard Worker        * sources of 64-bit instructions. */
576*61046927SAndroid Build Coastguard Worker 
577*61046927SAndroid Build Coastguard Worker       if (ins->type == TAG_ALU_4 && mir_is_64(ins)) {
578*61046927SAndroid Build Coastguard Worker          mir_foreach_src(ins, v) {
579*61046927SAndroid Build Coastguard Worker             unsigned s = ins->src[v];
580*61046927SAndroid Build Coastguard Worker 
581*61046927SAndroid Build Coastguard Worker             if (s < ctx->temp_count)
582*61046927SAndroid Build Coastguard Worker                min_alignment[s] = MAX2(3, min_alignment[s]);
583*61046927SAndroid Build Coastguard Worker          }
584*61046927SAndroid Build Coastguard Worker       }
585*61046927SAndroid Build Coastguard Worker 
586*61046927SAndroid Build Coastguard Worker       if (ins->type == TAG_LOAD_STORE_4 && OP_HAS_ADDRESS(ins->op)) {
587*61046927SAndroid Build Coastguard Worker          mir_foreach_src(ins, v) {
588*61046927SAndroid Build Coastguard Worker             unsigned s = ins->src[v];
589*61046927SAndroid Build Coastguard Worker             unsigned size = nir_alu_type_get_type_size(ins->src_types[v]);
590*61046927SAndroid Build Coastguard Worker 
591*61046927SAndroid Build Coastguard Worker             if (s < ctx->temp_count)
592*61046927SAndroid Build Coastguard Worker                min_alignment[s] = MAX2((size == 64) ? 3 : 2, min_alignment[s]);
593*61046927SAndroid Build Coastguard Worker          }
594*61046927SAndroid Build Coastguard Worker       }
595*61046927SAndroid Build Coastguard Worker 
596*61046927SAndroid Build Coastguard Worker       /* Anything read as 16-bit needs proper alignment to ensure the
597*61046927SAndroid Build Coastguard Worker        * resulting code can be packed.
598*61046927SAndroid Build Coastguard Worker        */
599*61046927SAndroid Build Coastguard Worker       mir_foreach_src(ins, s) {
600*61046927SAndroid Build Coastguard Worker          unsigned src_size = nir_alu_type_get_type_size(ins->src_types[s]);
601*61046927SAndroid Build Coastguard Worker          if (src_size == 16 && ins->src[s] < SSA_FIXED_MINIMUM)
602*61046927SAndroid Build Coastguard Worker             min_bound[ins->src[s]] = MAX2(min_bound[ins->src[s]], 8);
603*61046927SAndroid Build Coastguard Worker       }
604*61046927SAndroid Build Coastguard Worker 
605*61046927SAndroid Build Coastguard Worker       /* Everything after this concerns only the destination, not the
606*61046927SAndroid Build Coastguard Worker        * sources.
607*61046927SAndroid Build Coastguard Worker        */
608*61046927SAndroid Build Coastguard Worker       if (ins->dest >= SSA_FIXED_MINIMUM)
609*61046927SAndroid Build Coastguard Worker          continue;
610*61046927SAndroid Build Coastguard Worker 
611*61046927SAndroid Build Coastguard Worker       unsigned size = nir_alu_type_get_type_size(ins->dest_type);
612*61046927SAndroid Build Coastguard Worker 
613*61046927SAndroid Build Coastguard Worker       if (ins->is_pack)
614*61046927SAndroid Build Coastguard Worker          size = 32;
615*61046927SAndroid Build Coastguard Worker 
616*61046927SAndroid Build Coastguard Worker       /* 0 for x, 1 for xy, 2 for xyz, 3 for xyzw */
617*61046927SAndroid Build Coastguard Worker       int comps1 = util_logbase2(ins->mask);
618*61046927SAndroid Build Coastguard Worker 
619*61046927SAndroid Build Coastguard Worker       int bytes = (comps1 + 1) * (size / 8);
620*61046927SAndroid Build Coastguard Worker 
621*61046927SAndroid Build Coastguard Worker       /* Use the largest class if there's ambiguity, this
622*61046927SAndroid Build Coastguard Worker        * handles partial writes */
623*61046927SAndroid Build Coastguard Worker 
624*61046927SAndroid Build Coastguard Worker       int dest = ins->dest;
625*61046927SAndroid Build Coastguard Worker       found_class[dest] = MAX2(found_class[dest], bytes);
626*61046927SAndroid Build Coastguard Worker 
627*61046927SAndroid Build Coastguard Worker       min_alignment[dest] =
628*61046927SAndroid Build Coastguard Worker          MAX2(min_alignment[dest], (size == 16) ? 1 : /* (1 << 1) = 2-byte */
629*61046927SAndroid Build Coastguard Worker                                       (size == 32) ? 2
630*61046927SAndroid Build Coastguard Worker                                                    : /* (1 << 2) = 4-byte */
631*61046927SAndroid Build Coastguard Worker                                       (size == 64) ? 3
632*61046927SAndroid Build Coastguard Worker                                                    : /* (1 << 3) = 8-byte */
633*61046927SAndroid Build Coastguard Worker                                       3);            /* 8-bit todo */
634*61046927SAndroid Build Coastguard Worker 
635*61046927SAndroid Build Coastguard Worker       /* We can't cross xy/zw boundaries. TODO: vec8 can */
636*61046927SAndroid Build Coastguard Worker       if (size == 16 && min_alignment[dest] != 4)
637*61046927SAndroid Build Coastguard Worker          min_bound[dest] = 8;
638*61046927SAndroid Build Coastguard Worker 
639*61046927SAndroid Build Coastguard Worker       /* We don't have a swizzle for the conditional and we don't
640*61046927SAndroid Build Coastguard Worker        * want to muck with the conditional itself, so just force
641*61046927SAndroid Build Coastguard Worker        * alignment for now */
642*61046927SAndroid Build Coastguard Worker 
643*61046927SAndroid Build Coastguard Worker       if (ins->type == TAG_ALU_4 && OP_IS_CSEL_V(ins->op)) {
644*61046927SAndroid Build Coastguard Worker          min_alignment[dest] = 4; /* 1 << 4= 16-byte = vec4 */
645*61046927SAndroid Build Coastguard Worker 
646*61046927SAndroid Build Coastguard Worker          /* LCRA assumes bound >= alignment */
647*61046927SAndroid Build Coastguard Worker          min_bound[dest] = 16;
648*61046927SAndroid Build Coastguard Worker       }
649*61046927SAndroid Build Coastguard Worker 
650*61046927SAndroid Build Coastguard Worker       /* Since ld/st swizzles and masks are 32-bit only, we need them
651*61046927SAndroid Build Coastguard Worker        * aligned to enable final packing */
652*61046927SAndroid Build Coastguard Worker       if (ins->type == TAG_LOAD_STORE_4)
653*61046927SAndroid Build Coastguard Worker          min_alignment[dest] = MAX2(min_alignment[dest], 2);
654*61046927SAndroid Build Coastguard Worker    }
655*61046927SAndroid Build Coastguard Worker 
656*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ctx->temp_count; ++i) {
657*61046927SAndroid Build Coastguard Worker       lcra_set_alignment(l, i, min_alignment[i] ? min_alignment[i] : 2,
658*61046927SAndroid Build Coastguard Worker                          min_bound[i] ? min_bound[i] : 16);
659*61046927SAndroid Build Coastguard Worker       lcra_restrict_range(l, i, found_class[i]);
660*61046927SAndroid Build Coastguard Worker    }
661*61046927SAndroid Build Coastguard Worker 
662*61046927SAndroid Build Coastguard Worker    free(found_class);
663*61046927SAndroid Build Coastguard Worker    free(min_alignment);
664*61046927SAndroid Build Coastguard Worker    free(min_bound);
665*61046927SAndroid Build Coastguard Worker 
666*61046927SAndroid Build Coastguard Worker    /* Next, we'll determine semantic class. We default to zero (work).
667*61046927SAndroid Build Coastguard Worker     * But, if we're used with a special operation, that will force us to a
668*61046927SAndroid Build Coastguard Worker     * particular class. Each node must be assigned to exactly one class; a
669*61046927SAndroid Build Coastguard Worker     * prepass before RA should have lowered what-would-have-been
670*61046927SAndroid Build Coastguard Worker     * multiclass nodes into a series of moves to break it up into multiple
671*61046927SAndroid Build Coastguard Worker     * nodes (TODO) */
672*61046927SAndroid Build Coastguard Worker 
673*61046927SAndroid Build Coastguard Worker    mir_foreach_instr_global(ctx, ins) {
674*61046927SAndroid Build Coastguard Worker       /* Check if this operation imposes any classes */
675*61046927SAndroid Build Coastguard Worker 
676*61046927SAndroid Build Coastguard Worker       if (ins->type == TAG_LOAD_STORE_4) {
677*61046927SAndroid Build Coastguard Worker          set_class(l->class, ins->src[0], REG_CLASS_LDST);
678*61046927SAndroid Build Coastguard Worker          set_class(l->class, ins->src[1], REG_CLASS_LDST);
679*61046927SAndroid Build Coastguard Worker          set_class(l->class, ins->src[2], REG_CLASS_LDST);
680*61046927SAndroid Build Coastguard Worker          set_class(l->class, ins->src[3], REG_CLASS_LDST);
681*61046927SAndroid Build Coastguard Worker 
682*61046927SAndroid Build Coastguard Worker          if (OP_IS_VEC4_ONLY(ins->op)) {
683*61046927SAndroid Build Coastguard Worker             lcra_restrict_range(l, ins->dest, 16);
684*61046927SAndroid Build Coastguard Worker             lcra_restrict_range(l, ins->src[0], 16);
685*61046927SAndroid Build Coastguard Worker             lcra_restrict_range(l, ins->src[1], 16);
686*61046927SAndroid Build Coastguard Worker             lcra_restrict_range(l, ins->src[2], 16);
687*61046927SAndroid Build Coastguard Worker             lcra_restrict_range(l, ins->src[3], 16);
688*61046927SAndroid Build Coastguard Worker          }
689*61046927SAndroid Build Coastguard Worker       } else if (ins->type == TAG_TEXTURE_4) {
690*61046927SAndroid Build Coastguard Worker          set_class(l->class, ins->dest, REG_CLASS_TEXW);
691*61046927SAndroid Build Coastguard Worker          set_class(l->class, ins->src[0], REG_CLASS_TEXR);
692*61046927SAndroid Build Coastguard Worker          set_class(l->class, ins->src[1], REG_CLASS_TEXR);
693*61046927SAndroid Build Coastguard Worker          set_class(l->class, ins->src[2], REG_CLASS_TEXR);
694*61046927SAndroid Build Coastguard Worker          set_class(l->class, ins->src[3], REG_CLASS_TEXR);
695*61046927SAndroid Build Coastguard Worker       }
696*61046927SAndroid Build Coastguard Worker    }
697*61046927SAndroid Build Coastguard Worker 
698*61046927SAndroid Build Coastguard Worker    /* Check that the semantics of the class are respected */
699*61046927SAndroid Build Coastguard Worker    mir_foreach_instr_global(ctx, ins) {
700*61046927SAndroid Build Coastguard Worker       assert(check_write_class(l->class, ins->type, ins->dest));
701*61046927SAndroid Build Coastguard Worker       assert(check_read_class(l->class, ins->type, ins->src[0]));
702*61046927SAndroid Build Coastguard Worker       assert(check_read_class(l->class, ins->type, ins->src[1]));
703*61046927SAndroid Build Coastguard Worker       assert(check_read_class(l->class, ins->type, ins->src[2]));
704*61046927SAndroid Build Coastguard Worker       assert(check_read_class(l->class, ins->type, ins->src[3]));
705*61046927SAndroid Build Coastguard Worker    }
706*61046927SAndroid Build Coastguard Worker 
707*61046927SAndroid Build Coastguard Worker    /* Mark writeout to r0, depth to r1.x, stencil to r1.y,
708*61046927SAndroid Build Coastguard Worker     * render target to r1.z, unknown to r1.w */
709*61046927SAndroid Build Coastguard Worker    mir_foreach_instr_global(ctx, ins) {
710*61046927SAndroid Build Coastguard Worker       if (!(ins->compact_branch && ins->writeout))
711*61046927SAndroid Build Coastguard Worker          continue;
712*61046927SAndroid Build Coastguard Worker 
713*61046927SAndroid Build Coastguard Worker       if (ins->src[0] < ctx->temp_count)
714*61046927SAndroid Build Coastguard Worker          l->solutions[ins->src[0]] = 0;
715*61046927SAndroid Build Coastguard Worker 
716*61046927SAndroid Build Coastguard Worker       if (ins->src[2] < ctx->temp_count)
717*61046927SAndroid Build Coastguard Worker          l->solutions[ins->src[2]] = (16 * 1) + COMPONENT_X * 4;
718*61046927SAndroid Build Coastguard Worker 
719*61046927SAndroid Build Coastguard Worker       if (ins->src[3] < ctx->temp_count)
720*61046927SAndroid Build Coastguard Worker          l->solutions[ins->src[3]] = (16 * 1) + COMPONENT_Y * 4;
721*61046927SAndroid Build Coastguard Worker 
722*61046927SAndroid Build Coastguard Worker       if (ins->src[1] < ctx->temp_count)
723*61046927SAndroid Build Coastguard Worker          l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4;
724*61046927SAndroid Build Coastguard Worker 
725*61046927SAndroid Build Coastguard Worker       if (ins->dest < ctx->temp_count)
726*61046927SAndroid Build Coastguard Worker          l->solutions[ins->dest] = (16 * 1) + COMPONENT_W * 4;
727*61046927SAndroid Build Coastguard Worker    }
728*61046927SAndroid Build Coastguard Worker 
729*61046927SAndroid Build Coastguard Worker    /* Destinations of instructions in a writeout block cannot be assigned
730*61046927SAndroid Build Coastguard Worker     * to r1 unless they are actually used as r1 from the writeout itself,
731*61046927SAndroid Build Coastguard Worker     * since the writes to r1 are special. A code sequence like:
732*61046927SAndroid Build Coastguard Worker     *
733*61046927SAndroid Build Coastguard Worker     *      sadd.fmov r1.x, [...]
734*61046927SAndroid Build Coastguard Worker     *      vadd.fadd r0, r1, r2
735*61046927SAndroid Build Coastguard Worker     *      [writeout branch]
736*61046927SAndroid Build Coastguard Worker     *
737*61046927SAndroid Build Coastguard Worker     * will misbehave since the r1.x write will be interpreted as a
738*61046927SAndroid Build Coastguard Worker     * gl_FragDepth write so it won't show up correctly when r1 is read in
739*61046927SAndroid Build Coastguard Worker     * the following segment. We model this as interference.
740*61046927SAndroid Build Coastguard Worker     */
741*61046927SAndroid Build Coastguard Worker 
742*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 4; ++i)
743*61046927SAndroid Build Coastguard Worker       l->solutions[ctx->temp_count + i] = (16 * i);
744*61046927SAndroid Build Coastguard Worker 
745*61046927SAndroid Build Coastguard Worker    mir_foreach_block(ctx, _blk) {
746*61046927SAndroid Build Coastguard Worker       midgard_block *blk = (midgard_block *)_blk;
747*61046927SAndroid Build Coastguard Worker 
748*61046927SAndroid Build Coastguard Worker       mir_foreach_bundle_in_block(blk, v) {
749*61046927SAndroid Build Coastguard Worker          /* We need at least a writeout and nonwriteout instruction */
750*61046927SAndroid Build Coastguard Worker          if (v->instruction_count < 2)
751*61046927SAndroid Build Coastguard Worker             continue;
752*61046927SAndroid Build Coastguard Worker 
753*61046927SAndroid Build Coastguard Worker          /* Branches always come at the end */
754*61046927SAndroid Build Coastguard Worker          midgard_instruction *br = v->instructions[v->instruction_count - 1];
755*61046927SAndroid Build Coastguard Worker 
756*61046927SAndroid Build Coastguard Worker          if (!br->writeout)
757*61046927SAndroid Build Coastguard Worker             continue;
758*61046927SAndroid Build Coastguard Worker 
759*61046927SAndroid Build Coastguard Worker          for (signed i = v->instruction_count - 2; i >= 0; --i) {
760*61046927SAndroid Build Coastguard Worker             midgard_instruction *ins = v->instructions[i];
761*61046927SAndroid Build Coastguard Worker 
762*61046927SAndroid Build Coastguard Worker             if (ins->dest >= ctx->temp_count)
763*61046927SAndroid Build Coastguard Worker                continue;
764*61046927SAndroid Build Coastguard Worker 
765*61046927SAndroid Build Coastguard Worker             bool used_as_r1 = (br->dest == ins->dest);
766*61046927SAndroid Build Coastguard Worker 
767*61046927SAndroid Build Coastguard Worker             mir_foreach_src(br, s)
768*61046927SAndroid Build Coastguard Worker                used_as_r1 |= (s > 0) && (br->src[s] == ins->dest);
769*61046927SAndroid Build Coastguard Worker 
770*61046927SAndroid Build Coastguard Worker             if (!used_as_r1)
771*61046927SAndroid Build Coastguard Worker                lcra_add_node_interference(l, ins->dest, mir_bytemask(ins),
772*61046927SAndroid Build Coastguard Worker                                           node_r1, 0xFFFF);
773*61046927SAndroid Build Coastguard Worker          }
774*61046927SAndroid Build Coastguard Worker       }
775*61046927SAndroid Build Coastguard Worker    }
776*61046927SAndroid Build Coastguard Worker 
777*61046927SAndroid Build Coastguard Worker    /* Precolour blend input to r0. Note writeout is necessarily at the end
778*61046927SAndroid Build Coastguard Worker     * and blend shaders are single-RT only so there is only a single
779*61046927SAndroid Build Coastguard Worker     * writeout block, so this cannot conflict with the writeout r0 (there
780*61046927SAndroid Build Coastguard Worker     * is no need to have an intermediate move) */
781*61046927SAndroid Build Coastguard Worker 
782*61046927SAndroid Build Coastguard Worker    if (ctx->blend_input != ~0) {
783*61046927SAndroid Build Coastguard Worker       assert(ctx->blend_input < ctx->temp_count);
784*61046927SAndroid Build Coastguard Worker       l->solutions[ctx->blend_input] = 0;
785*61046927SAndroid Build Coastguard Worker    }
786*61046927SAndroid Build Coastguard Worker 
787*61046927SAndroid Build Coastguard Worker    /* Same for the dual-source blend input/output, except here we use r2,
788*61046927SAndroid Build Coastguard Worker     * which is also set in the fragment shader. */
789*61046927SAndroid Build Coastguard Worker 
790*61046927SAndroid Build Coastguard Worker    if (ctx->blend_src1 != ~0) {
791*61046927SAndroid Build Coastguard Worker       assert(ctx->blend_src1 < ctx->temp_count);
792*61046927SAndroid Build Coastguard Worker       l->solutions[ctx->blend_src1] = (16 * 2);
793*61046927SAndroid Build Coastguard Worker       ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
794*61046927SAndroid Build Coastguard Worker    }
795*61046927SAndroid Build Coastguard Worker 
796*61046927SAndroid Build Coastguard Worker    mir_compute_interference(ctx, l);
797*61046927SAndroid Build Coastguard Worker 
798*61046927SAndroid Build Coastguard Worker    *spilled = !lcra_solve(l);
799*61046927SAndroid Build Coastguard Worker    return l;
800*61046927SAndroid Build Coastguard Worker }
801*61046927SAndroid Build Coastguard Worker 
802*61046927SAndroid Build Coastguard Worker /* Once registers have been decided via register allocation
803*61046927SAndroid Build Coastguard Worker  * (allocate_registers), we need to rewrite the MIR to use registers instead of
804*61046927SAndroid Build Coastguard Worker  * indices */
805*61046927SAndroid Build Coastguard Worker 
806*61046927SAndroid Build Coastguard Worker static void
install_registers_instr(compiler_context * ctx,struct lcra_state * l,midgard_instruction * ins)807*61046927SAndroid Build Coastguard Worker install_registers_instr(compiler_context *ctx, struct lcra_state *l,
808*61046927SAndroid Build Coastguard Worker                         midgard_instruction *ins)
809*61046927SAndroid Build Coastguard Worker {
810*61046927SAndroid Build Coastguard Worker    unsigned src_shift[MIR_SRC_COUNT];
811*61046927SAndroid Build Coastguard Worker 
812*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < MIR_SRC_COUNT; ++i) {
813*61046927SAndroid Build Coastguard Worker       src_shift[i] =
814*61046927SAndroid Build Coastguard Worker          util_logbase2(nir_alu_type_get_type_size(ins->src_types[i]) / 8);
815*61046927SAndroid Build Coastguard Worker    }
816*61046927SAndroid Build Coastguard Worker 
817*61046927SAndroid Build Coastguard Worker    unsigned dest_shift =
818*61046927SAndroid Build Coastguard Worker       util_logbase2(nir_alu_type_get_type_size(ins->dest_type) / 8);
819*61046927SAndroid Build Coastguard Worker 
820*61046927SAndroid Build Coastguard Worker    switch (ins->type) {
821*61046927SAndroid Build Coastguard Worker    case TAG_ALU_4:
822*61046927SAndroid Build Coastguard Worker    case TAG_ALU_8:
823*61046927SAndroid Build Coastguard Worker    case TAG_ALU_12:
824*61046927SAndroid Build Coastguard Worker    case TAG_ALU_16: {
825*61046927SAndroid Build Coastguard Worker       if (ins->compact_branch)
826*61046927SAndroid Build Coastguard Worker          return;
827*61046927SAndroid Build Coastguard Worker 
828*61046927SAndroid Build Coastguard Worker       struct phys_reg src1 = index_to_reg(ctx, l, ins->src[0], src_shift[0]);
829*61046927SAndroid Build Coastguard Worker       struct phys_reg src2 = index_to_reg(ctx, l, ins->src[1], src_shift[1]);
830*61046927SAndroid Build Coastguard Worker       struct phys_reg dest = index_to_reg(ctx, l, ins->dest, dest_shift);
831*61046927SAndroid Build Coastguard Worker 
832*61046927SAndroid Build Coastguard Worker       mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
833*61046927SAndroid Build Coastguard Worker 
834*61046927SAndroid Build Coastguard Worker       unsigned dest_offset =
835*61046927SAndroid Build Coastguard Worker          GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props) ? 0 : dest.offset;
836*61046927SAndroid Build Coastguard Worker 
837*61046927SAndroid Build Coastguard Worker       offset_swizzle(ins->swizzle[0], src1.offset, src1.shift, dest.shift,
838*61046927SAndroid Build Coastguard Worker                      dest_offset);
839*61046927SAndroid Build Coastguard Worker       if (!ins->has_inline_constant)
840*61046927SAndroid Build Coastguard Worker          offset_swizzle(ins->swizzle[1], src2.offset, src2.shift, dest.shift,
841*61046927SAndroid Build Coastguard Worker                         dest_offset);
842*61046927SAndroid Build Coastguard Worker       if (ins->src[0] != ~0)
843*61046927SAndroid Build Coastguard Worker          ins->src[0] = SSA_FIXED_REGISTER(src1.reg);
844*61046927SAndroid Build Coastguard Worker       if (ins->src[1] != ~0)
845*61046927SAndroid Build Coastguard Worker          ins->src[1] = SSA_FIXED_REGISTER(src2.reg);
846*61046927SAndroid Build Coastguard Worker       if (ins->dest != ~0)
847*61046927SAndroid Build Coastguard Worker          ins->dest = SSA_FIXED_REGISTER(dest.reg);
848*61046927SAndroid Build Coastguard Worker       break;
849*61046927SAndroid Build Coastguard Worker    }
850*61046927SAndroid Build Coastguard Worker 
851*61046927SAndroid Build Coastguard Worker    case TAG_LOAD_STORE_4: {
852*61046927SAndroid Build Coastguard Worker       /* Which physical register we read off depends on
853*61046927SAndroid Build Coastguard Worker        * whether we are loading or storing -- think about the
854*61046927SAndroid Build Coastguard Worker        * logical dataflow */
855*61046927SAndroid Build Coastguard Worker 
856*61046927SAndroid Build Coastguard Worker       bool encodes_src = OP_IS_STORE(ins->op);
857*61046927SAndroid Build Coastguard Worker 
858*61046927SAndroid Build Coastguard Worker       if (encodes_src) {
859*61046927SAndroid Build Coastguard Worker          struct phys_reg src = index_to_reg(ctx, l, ins->src[0], src_shift[0]);
860*61046927SAndroid Build Coastguard Worker          assert(src.reg == 26 || src.reg == 27);
861*61046927SAndroid Build Coastguard Worker 
862*61046927SAndroid Build Coastguard Worker          ins->src[0] = SSA_FIXED_REGISTER(src.reg);
863*61046927SAndroid Build Coastguard Worker          offset_swizzle(ins->swizzle[0], src.offset, src.shift, 0, 0);
864*61046927SAndroid Build Coastguard Worker       } else {
865*61046927SAndroid Build Coastguard Worker          struct phys_reg dst = index_to_reg(ctx, l, ins->dest, dest_shift);
866*61046927SAndroid Build Coastguard Worker 
867*61046927SAndroid Build Coastguard Worker          ins->dest = SSA_FIXED_REGISTER(dst.reg);
868*61046927SAndroid Build Coastguard Worker          offset_swizzle(ins->swizzle[0], 0, 2, dest_shift, dst.offset);
869*61046927SAndroid Build Coastguard Worker          mir_set_bytemask(ins, mir_bytemask(ins) << dst.offset);
870*61046927SAndroid Build Coastguard Worker       }
871*61046927SAndroid Build Coastguard Worker 
872*61046927SAndroid Build Coastguard Worker       /* We also follow up by actual arguments */
873*61046927SAndroid Build Coastguard Worker 
874*61046927SAndroid Build Coastguard Worker       for (int i = 1; i <= 3; i++) {
875*61046927SAndroid Build Coastguard Worker          unsigned src_index = ins->src[i];
876*61046927SAndroid Build Coastguard Worker          if (src_index != ~0) {
877*61046927SAndroid Build Coastguard Worker             struct phys_reg src = index_to_reg(ctx, l, src_index, src_shift[i]);
878*61046927SAndroid Build Coastguard Worker             unsigned component = src.offset >> src.shift;
879*61046927SAndroid Build Coastguard Worker             assert(component << src.shift == src.offset);
880*61046927SAndroid Build Coastguard Worker             ins->src[i] = SSA_FIXED_REGISTER(src.reg);
881*61046927SAndroid Build Coastguard Worker             ins->swizzle[i][0] += component;
882*61046927SAndroid Build Coastguard Worker          }
883*61046927SAndroid Build Coastguard Worker       }
884*61046927SAndroid Build Coastguard Worker 
885*61046927SAndroid Build Coastguard Worker       break;
886*61046927SAndroid Build Coastguard Worker    }
887*61046927SAndroid Build Coastguard Worker 
888*61046927SAndroid Build Coastguard Worker    case TAG_TEXTURE_4: {
889*61046927SAndroid Build Coastguard Worker       if (ins->op == midgard_tex_op_barrier)
890*61046927SAndroid Build Coastguard Worker          break;
891*61046927SAndroid Build Coastguard Worker 
892*61046927SAndroid Build Coastguard Worker       /* Grab RA results */
893*61046927SAndroid Build Coastguard Worker       struct phys_reg dest = index_to_reg(ctx, l, ins->dest, dest_shift);
894*61046927SAndroid Build Coastguard Worker       struct phys_reg coord = index_to_reg(ctx, l, ins->src[1], src_shift[1]);
895*61046927SAndroid Build Coastguard Worker       struct phys_reg lod = index_to_reg(ctx, l, ins->src[2], src_shift[2]);
896*61046927SAndroid Build Coastguard Worker       struct phys_reg offset = index_to_reg(ctx, l, ins->src[3], src_shift[3]);
897*61046927SAndroid Build Coastguard Worker 
898*61046927SAndroid Build Coastguard Worker       /* First, install the texture coordinate */
899*61046927SAndroid Build Coastguard Worker       if (ins->src[1] != ~0)
900*61046927SAndroid Build Coastguard Worker          ins->src[1] = SSA_FIXED_REGISTER(coord.reg);
901*61046927SAndroid Build Coastguard Worker       offset_swizzle(ins->swizzle[1], coord.offset, coord.shift, dest.shift, 0);
902*61046927SAndroid Build Coastguard Worker 
903*61046927SAndroid Build Coastguard Worker       /* Next, install the destination */
904*61046927SAndroid Build Coastguard Worker       if (ins->dest != ~0)
905*61046927SAndroid Build Coastguard Worker          ins->dest = SSA_FIXED_REGISTER(dest.reg);
906*61046927SAndroid Build Coastguard Worker       offset_swizzle(ins->swizzle[0], 0, 2, dest.shift,
907*61046927SAndroid Build Coastguard Worker                      dest_shift == 1 ? dest.offset % 8 : dest.offset);
908*61046927SAndroid Build Coastguard Worker       mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
909*61046927SAndroid Build Coastguard Worker 
910*61046927SAndroid Build Coastguard Worker       /* If there is a register LOD/bias, use it */
911*61046927SAndroid Build Coastguard Worker       if (ins->src[2] != ~0) {
912*61046927SAndroid Build Coastguard Worker          assert(!(lod.offset & 3));
913*61046927SAndroid Build Coastguard Worker          ins->src[2] = SSA_FIXED_REGISTER(lod.reg);
914*61046927SAndroid Build Coastguard Worker          ins->swizzle[2][0] = lod.offset / 4;
915*61046927SAndroid Build Coastguard Worker       }
916*61046927SAndroid Build Coastguard Worker 
917*61046927SAndroid Build Coastguard Worker       /* If there is an offset register, install it */
918*61046927SAndroid Build Coastguard Worker       if (ins->src[3] != ~0) {
919*61046927SAndroid Build Coastguard Worker          ins->src[3] = SSA_FIXED_REGISTER(offset.reg);
920*61046927SAndroid Build Coastguard Worker          ins->swizzle[3][0] = offset.offset / 4;
921*61046927SAndroid Build Coastguard Worker       }
922*61046927SAndroid Build Coastguard Worker 
923*61046927SAndroid Build Coastguard Worker       break;
924*61046927SAndroid Build Coastguard Worker    }
925*61046927SAndroid Build Coastguard Worker 
926*61046927SAndroid Build Coastguard Worker    default:
927*61046927SAndroid Build Coastguard Worker       break;
928*61046927SAndroid Build Coastguard Worker    }
929*61046927SAndroid Build Coastguard Worker }
930*61046927SAndroid Build Coastguard Worker 
931*61046927SAndroid Build Coastguard Worker static void
install_registers(compiler_context * ctx,struct lcra_state * l)932*61046927SAndroid Build Coastguard Worker install_registers(compiler_context *ctx, struct lcra_state *l)
933*61046927SAndroid Build Coastguard Worker {
934*61046927SAndroid Build Coastguard Worker    mir_foreach_instr_global(ctx, ins)
935*61046927SAndroid Build Coastguard Worker       install_registers_instr(ctx, l, ins);
936*61046927SAndroid Build Coastguard Worker }
937*61046927SAndroid Build Coastguard Worker 
938*61046927SAndroid Build Coastguard Worker /* If register allocation fails, find the best spill node */
939*61046927SAndroid Build Coastguard Worker 
940*61046927SAndroid Build Coastguard Worker static signed
mir_choose_spill_node(compiler_context * ctx,struct lcra_state * l)941*61046927SAndroid Build Coastguard Worker mir_choose_spill_node(compiler_context *ctx, struct lcra_state *l)
942*61046927SAndroid Build Coastguard Worker {
943*61046927SAndroid Build Coastguard Worker    /* We can't spill a previously spilled value or an unspill */
944*61046927SAndroid Build Coastguard Worker 
945*61046927SAndroid Build Coastguard Worker    mir_foreach_instr_global(ctx, ins) {
946*61046927SAndroid Build Coastguard Worker       if (ins->no_spill & (1 << l->spill_class)) {
947*61046927SAndroid Build Coastguard Worker          lcra_set_node_spill_cost(l, ins->dest, -1);
948*61046927SAndroid Build Coastguard Worker 
949*61046927SAndroid Build Coastguard Worker          if (l->spill_class != REG_CLASS_WORK) {
950*61046927SAndroid Build Coastguard Worker             mir_foreach_src(ins, s)
951*61046927SAndroid Build Coastguard Worker                lcra_set_node_spill_cost(l, ins->src[s], -1);
952*61046927SAndroid Build Coastguard Worker          }
953*61046927SAndroid Build Coastguard Worker       }
954*61046927SAndroid Build Coastguard Worker    }
955*61046927SAndroid Build Coastguard Worker 
956*61046927SAndroid Build Coastguard Worker    return lcra_get_best_spill_node(l);
957*61046927SAndroid Build Coastguard Worker }
958*61046927SAndroid Build Coastguard Worker 
959*61046927SAndroid Build Coastguard Worker /* Once we've chosen a spill node, spill it */
960*61046927SAndroid Build Coastguard Worker 
961*61046927SAndroid Build Coastguard Worker static void
mir_spill_register(compiler_context * ctx,unsigned spill_node,unsigned spill_class,unsigned * spill_count)962*61046927SAndroid Build Coastguard Worker mir_spill_register(compiler_context *ctx, unsigned spill_node,
963*61046927SAndroid Build Coastguard Worker                    unsigned spill_class, unsigned *spill_count)
964*61046927SAndroid Build Coastguard Worker {
965*61046927SAndroid Build Coastguard Worker    if (spill_class == REG_CLASS_WORK && ctx->inputs->is_blend)
966*61046927SAndroid Build Coastguard Worker       unreachable("Blend shader spilling is currently unimplemented");
967*61046927SAndroid Build Coastguard Worker 
968*61046927SAndroid Build Coastguard Worker    unsigned spill_index = ctx->temp_count;
969*61046927SAndroid Build Coastguard Worker 
970*61046927SAndroid Build Coastguard Worker    /* We have a spill node, so check the class. Work registers
971*61046927SAndroid Build Coastguard Worker     * legitimately spill to TLS, but special registers just spill to work
972*61046927SAndroid Build Coastguard Worker     * registers */
973*61046927SAndroid Build Coastguard Worker 
974*61046927SAndroid Build Coastguard Worker    bool is_special = spill_class != REG_CLASS_WORK;
975*61046927SAndroid Build Coastguard Worker    bool is_special_w = spill_class == REG_CLASS_TEXW;
976*61046927SAndroid Build Coastguard Worker 
977*61046927SAndroid Build Coastguard Worker    /* Allocate TLS slot (maybe) */
978*61046927SAndroid Build Coastguard Worker    unsigned spill_slot = !is_special ? (*spill_count)++ : 0;
979*61046927SAndroid Build Coastguard Worker 
980*61046927SAndroid Build Coastguard Worker    /* For special reads, figure out how many bytes we need */
981*61046927SAndroid Build Coastguard Worker    unsigned read_bytemask = 0;
982*61046927SAndroid Build Coastguard Worker 
983*61046927SAndroid Build Coastguard Worker    /* If multiple instructions write to this destination, we'll have to
984*61046927SAndroid Build Coastguard Worker     * fill from TLS before writing */
985*61046927SAndroid Build Coastguard Worker    unsigned write_count = 0;
986*61046927SAndroid Build Coastguard Worker 
987*61046927SAndroid Build Coastguard Worker    mir_foreach_instr_global_safe(ctx, ins) {
988*61046927SAndroid Build Coastguard Worker       read_bytemask |= mir_bytemask_of_read_components(ins, spill_node);
989*61046927SAndroid Build Coastguard Worker       if (ins->dest == spill_node)
990*61046927SAndroid Build Coastguard Worker          ++write_count;
991*61046927SAndroid Build Coastguard Worker    }
992*61046927SAndroid Build Coastguard Worker 
993*61046927SAndroid Build Coastguard Worker    /* For TLS, replace all stores to the spilled node. For
994*61046927SAndroid Build Coastguard Worker     * special reads, just keep as-is; the class will be demoted
995*61046927SAndroid Build Coastguard Worker     * implicitly. For special writes, spill to a work register */
996*61046927SAndroid Build Coastguard Worker 
997*61046927SAndroid Build Coastguard Worker    if (!is_special || is_special_w) {
998*61046927SAndroid Build Coastguard Worker       if (is_special_w)
999*61046927SAndroid Build Coastguard Worker          spill_slot = spill_index++;
1000*61046927SAndroid Build Coastguard Worker 
1001*61046927SAndroid Build Coastguard Worker       unsigned last_id = ~0;
1002*61046927SAndroid Build Coastguard Worker       unsigned last_fill = ~0;
1003*61046927SAndroid Build Coastguard Worker       unsigned last_spill_index = ~0;
1004*61046927SAndroid Build Coastguard Worker       midgard_instruction *last_spill = NULL;
1005*61046927SAndroid Build Coastguard Worker 
1006*61046927SAndroid Build Coastguard Worker       mir_foreach_block(ctx, _block) {
1007*61046927SAndroid Build Coastguard Worker          midgard_block *block = (midgard_block *)_block;
1008*61046927SAndroid Build Coastguard Worker          mir_foreach_instr_in_block_safe(block, ins) {
1009*61046927SAndroid Build Coastguard Worker             if (ins->dest != spill_node)
1010*61046927SAndroid Build Coastguard Worker                continue;
1011*61046927SAndroid Build Coastguard Worker 
1012*61046927SAndroid Build Coastguard Worker             /* Note: it's important to match the mask of the spill
1013*61046927SAndroid Build Coastguard Worker              * with the mask of the instruction whose destination
1014*61046927SAndroid Build Coastguard Worker              * we're spilling, or otherwise we'll read invalid
1015*61046927SAndroid Build Coastguard Worker              * components and can fail RA in a subsequent iteration
1016*61046927SAndroid Build Coastguard Worker              */
1017*61046927SAndroid Build Coastguard Worker 
1018*61046927SAndroid Build Coastguard Worker             if (is_special_w) {
1019*61046927SAndroid Build Coastguard Worker                midgard_instruction st = v_mov(spill_node, spill_slot);
1020*61046927SAndroid Build Coastguard Worker                st.no_spill |= (1 << spill_class);
1021*61046927SAndroid Build Coastguard Worker                st.mask = ins->mask;
1022*61046927SAndroid Build Coastguard Worker                st.dest_type = st.src_types[1] = ins->dest_type;
1023*61046927SAndroid Build Coastguard Worker 
1024*61046927SAndroid Build Coastguard Worker                /* Hint: don't rewrite this node */
1025*61046927SAndroid Build Coastguard Worker                st.hint = true;
1026*61046927SAndroid Build Coastguard Worker 
1027*61046927SAndroid Build Coastguard Worker                mir_insert_instruction_after_scheduled(ctx, block, ins, st);
1028*61046927SAndroid Build Coastguard Worker             } else {
1029*61046927SAndroid Build Coastguard Worker                unsigned bundle = ins->bundle_id;
1030*61046927SAndroid Build Coastguard Worker                unsigned dest =
1031*61046927SAndroid Build Coastguard Worker                   (bundle == last_id) ? last_spill_index : spill_index++;
1032*61046927SAndroid Build Coastguard Worker 
1033*61046927SAndroid Build Coastguard Worker                unsigned bytemask = mir_bytemask(ins);
1034*61046927SAndroid Build Coastguard Worker                unsigned write_mask =
1035*61046927SAndroid Build Coastguard Worker                   mir_from_bytemask(mir_round_bytemask_up(bytemask, 32), 32);
1036*61046927SAndroid Build Coastguard Worker 
1037*61046927SAndroid Build Coastguard Worker                if (write_count > 1 && bytemask != 0xFFFF &&
1038*61046927SAndroid Build Coastguard Worker                    bundle != last_fill) {
1039*61046927SAndroid Build Coastguard Worker                   midgard_instruction read =
1040*61046927SAndroid Build Coastguard Worker                      v_load_store_scratch(dest, spill_slot, false, 0xF);
1041*61046927SAndroid Build Coastguard Worker                   mir_insert_instruction_before_scheduled(ctx, block, ins,
1042*61046927SAndroid Build Coastguard Worker                                                           read);
1043*61046927SAndroid Build Coastguard Worker                   write_mask = 0xF;
1044*61046927SAndroid Build Coastguard Worker                   last_fill = bundle;
1045*61046927SAndroid Build Coastguard Worker                }
1046*61046927SAndroid Build Coastguard Worker 
1047*61046927SAndroid Build Coastguard Worker                ins->dest = dest;
1048*61046927SAndroid Build Coastguard Worker                ins->no_spill |= (1 << spill_class);
1049*61046927SAndroid Build Coastguard Worker 
1050*61046927SAndroid Build Coastguard Worker                bool move = false;
1051*61046927SAndroid Build Coastguard Worker 
1052*61046927SAndroid Build Coastguard Worker                /* In the same bundle, reads of the destination
1053*61046927SAndroid Build Coastguard Worker                 * of the spilt instruction need to be direct */
1054*61046927SAndroid Build Coastguard Worker                midgard_instruction *it = ins;
1055*61046927SAndroid Build Coastguard Worker                while ((it = list_first_entry(&it->link, midgard_instruction,
1056*61046927SAndroid Build Coastguard Worker                                              link)) &&
1057*61046927SAndroid Build Coastguard Worker                       (it->bundle_id == bundle)) {
1058*61046927SAndroid Build Coastguard Worker 
1059*61046927SAndroid Build Coastguard Worker                   if (!mir_has_arg(it, spill_node))
1060*61046927SAndroid Build Coastguard Worker                      continue;
1061*61046927SAndroid Build Coastguard Worker 
1062*61046927SAndroid Build Coastguard Worker                   mir_rewrite_index_src_single(it, spill_node, dest);
1063*61046927SAndroid Build Coastguard Worker 
1064*61046927SAndroid Build Coastguard Worker                   /* The spilt instruction will write to
1065*61046927SAndroid Build Coastguard Worker                    * a work register for `it` to read but
1066*61046927SAndroid Build Coastguard Worker                    * the spill needs an LD/ST register */
1067*61046927SAndroid Build Coastguard Worker                   move = true;
1068*61046927SAndroid Build Coastguard Worker                }
1069*61046927SAndroid Build Coastguard Worker 
1070*61046927SAndroid Build Coastguard Worker                if (move)
1071*61046927SAndroid Build Coastguard Worker                   dest = spill_index++;
1072*61046927SAndroid Build Coastguard Worker 
1073*61046927SAndroid Build Coastguard Worker                if (last_id == bundle) {
1074*61046927SAndroid Build Coastguard Worker                   last_spill->mask |= write_mask;
1075*61046927SAndroid Build Coastguard Worker                   u_foreach_bit(c, write_mask)
1076*61046927SAndroid Build Coastguard Worker                      last_spill->swizzle[0][c] = c;
1077*61046927SAndroid Build Coastguard Worker                } else {
1078*61046927SAndroid Build Coastguard Worker                   midgard_instruction st =
1079*61046927SAndroid Build Coastguard Worker                      v_load_store_scratch(dest, spill_slot, true, write_mask);
1080*61046927SAndroid Build Coastguard Worker                   last_spill = mir_insert_instruction_after_scheduled(
1081*61046927SAndroid Build Coastguard Worker                      ctx, block, ins, st);
1082*61046927SAndroid Build Coastguard Worker                }
1083*61046927SAndroid Build Coastguard Worker 
1084*61046927SAndroid Build Coastguard Worker                if (move) {
1085*61046927SAndroid Build Coastguard Worker                   midgard_instruction mv = v_mov(ins->dest, dest);
1086*61046927SAndroid Build Coastguard Worker                   mv.no_spill |= (1 << spill_class);
1087*61046927SAndroid Build Coastguard Worker 
1088*61046927SAndroid Build Coastguard Worker                   mir_insert_instruction_after_scheduled(ctx, block, ins, mv);
1089*61046927SAndroid Build Coastguard Worker                }
1090*61046927SAndroid Build Coastguard Worker 
1091*61046927SAndroid Build Coastguard Worker                last_id = bundle;
1092*61046927SAndroid Build Coastguard Worker                last_spill_index = ins->dest;
1093*61046927SAndroid Build Coastguard Worker             }
1094*61046927SAndroid Build Coastguard Worker 
1095*61046927SAndroid Build Coastguard Worker             if (!is_special)
1096*61046927SAndroid Build Coastguard Worker                ctx->spills++;
1097*61046927SAndroid Build Coastguard Worker          }
1098*61046927SAndroid Build Coastguard Worker       }
1099*61046927SAndroid Build Coastguard Worker    }
1100*61046927SAndroid Build Coastguard Worker 
1101*61046927SAndroid Build Coastguard Worker    /* Insert a load from TLS before the first consecutive
1102*61046927SAndroid Build Coastguard Worker     * use of the node, rewriting to use spilled indices to
1103*61046927SAndroid Build Coastguard Worker     * break up the live range. Or, for special, insert a
1104*61046927SAndroid Build Coastguard Worker     * move. Ironically the latter *increases* register
1105*61046927SAndroid Build Coastguard Worker     * pressure, but the two uses of the spilling mechanism
1106*61046927SAndroid Build Coastguard Worker     * are somewhat orthogonal. (special spilling is to use
1107*61046927SAndroid Build Coastguard Worker     * work registers to back special registers; TLS
1108*61046927SAndroid Build Coastguard Worker     * spilling is to use memory to back work registers) */
1109*61046927SAndroid Build Coastguard Worker 
1110*61046927SAndroid Build Coastguard Worker    mir_foreach_block(ctx, _block) {
1111*61046927SAndroid Build Coastguard Worker       midgard_block *block = (midgard_block *)_block;
1112*61046927SAndroid Build Coastguard Worker       mir_foreach_instr_in_block(block, ins) {
1113*61046927SAndroid Build Coastguard Worker          /* We can't rewrite the moves used to spill in the
1114*61046927SAndroid Build Coastguard Worker           * first place. These moves are hinted. */
1115*61046927SAndroid Build Coastguard Worker          if (ins->hint)
1116*61046927SAndroid Build Coastguard Worker             continue;
1117*61046927SAndroid Build Coastguard Worker 
1118*61046927SAndroid Build Coastguard Worker          /* If we don't use the spilled value, nothing to do */
1119*61046927SAndroid Build Coastguard Worker          if (!mir_has_arg(ins, spill_node))
1120*61046927SAndroid Build Coastguard Worker             continue;
1121*61046927SAndroid Build Coastguard Worker 
1122*61046927SAndroid Build Coastguard Worker          unsigned index = 0;
1123*61046927SAndroid Build Coastguard Worker 
1124*61046927SAndroid Build Coastguard Worker          if (!is_special_w) {
1125*61046927SAndroid Build Coastguard Worker             index = ++spill_index;
1126*61046927SAndroid Build Coastguard Worker 
1127*61046927SAndroid Build Coastguard Worker             midgard_instruction *before = ins;
1128*61046927SAndroid Build Coastguard Worker             midgard_instruction st;
1129*61046927SAndroid Build Coastguard Worker 
1130*61046927SAndroid Build Coastguard Worker             if (is_special) {
1131*61046927SAndroid Build Coastguard Worker                /* Move */
1132*61046927SAndroid Build Coastguard Worker                st = v_mov(spill_node, index);
1133*61046927SAndroid Build Coastguard Worker                st.no_spill |= (1 << spill_class);
1134*61046927SAndroid Build Coastguard Worker             } else {
1135*61046927SAndroid Build Coastguard Worker                /* TLS load */
1136*61046927SAndroid Build Coastguard Worker                st = v_load_store_scratch(index, spill_slot, false, 0xF);
1137*61046927SAndroid Build Coastguard Worker             }
1138*61046927SAndroid Build Coastguard Worker 
1139*61046927SAndroid Build Coastguard Worker             /* Mask the load based on the component count
1140*61046927SAndroid Build Coastguard Worker              * actually needed to prevent RA loops */
1141*61046927SAndroid Build Coastguard Worker 
1142*61046927SAndroid Build Coastguard Worker             st.mask =
1143*61046927SAndroid Build Coastguard Worker                mir_from_bytemask(mir_round_bytemask_up(read_bytemask, 32), 32);
1144*61046927SAndroid Build Coastguard Worker 
1145*61046927SAndroid Build Coastguard Worker             mir_insert_instruction_before_scheduled(ctx, block, before, st);
1146*61046927SAndroid Build Coastguard Worker          } else {
1147*61046927SAndroid Build Coastguard Worker             /* Special writes already have their move spilled in */
1148*61046927SAndroid Build Coastguard Worker             index = spill_slot;
1149*61046927SAndroid Build Coastguard Worker          }
1150*61046927SAndroid Build Coastguard Worker 
1151*61046927SAndroid Build Coastguard Worker          /* Rewrite to use */
1152*61046927SAndroid Build Coastguard Worker          mir_rewrite_index_src_single(ins, spill_node, index);
1153*61046927SAndroid Build Coastguard Worker 
1154*61046927SAndroid Build Coastguard Worker          if (!is_special)
1155*61046927SAndroid Build Coastguard Worker             ctx->fills++;
1156*61046927SAndroid Build Coastguard Worker       }
1157*61046927SAndroid Build Coastguard Worker    }
1158*61046927SAndroid Build Coastguard Worker 
1159*61046927SAndroid Build Coastguard Worker    /* Reset hints */
1160*61046927SAndroid Build Coastguard Worker 
1161*61046927SAndroid Build Coastguard Worker    mir_foreach_instr_global(ctx, ins) {
1162*61046927SAndroid Build Coastguard Worker       ins->hint = false;
1163*61046927SAndroid Build Coastguard Worker    }
1164*61046927SAndroid Build Coastguard Worker }
1165*61046927SAndroid Build Coastguard Worker 
1166*61046927SAndroid Build Coastguard Worker static void
mir_demote_uniforms(compiler_context * ctx,unsigned new_cutoff)1167*61046927SAndroid Build Coastguard Worker mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
1168*61046927SAndroid Build Coastguard Worker {
1169*61046927SAndroid Build Coastguard Worker    unsigned uniforms = ctx->info->push.count / 4;
1170*61046927SAndroid Build Coastguard Worker    unsigned old_work_count = 16 - MAX2(uniforms - 8, 0);
1171*61046927SAndroid Build Coastguard Worker    unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
1172*61046927SAndroid Build Coastguard Worker 
1173*61046927SAndroid Build Coastguard Worker    unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
1174*61046927SAndroid Build Coastguard Worker    unsigned max_demote = SSA_FIXED_REGISTER(work_count);
1175*61046927SAndroid Build Coastguard Worker 
1176*61046927SAndroid Build Coastguard Worker    mir_foreach_block(ctx, _block) {
1177*61046927SAndroid Build Coastguard Worker       midgard_block *block = (midgard_block *)_block;
1178*61046927SAndroid Build Coastguard Worker       mir_foreach_instr_in_block(block, ins) {
1179*61046927SAndroid Build Coastguard Worker          mir_foreach_src(ins, i) {
1180*61046927SAndroid Build Coastguard Worker             if (ins->src[i] < min_demote || ins->src[i] >= max_demote)
1181*61046927SAndroid Build Coastguard Worker                continue;
1182*61046927SAndroid Build Coastguard Worker 
1183*61046927SAndroid Build Coastguard Worker             midgard_instruction *before = ins;
1184*61046927SAndroid Build Coastguard Worker 
1185*61046927SAndroid Build Coastguard Worker             unsigned temp = make_compiler_temp(ctx);
1186*61046927SAndroid Build Coastguard Worker             unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
1187*61046927SAndroid Build Coastguard Worker             assert(idx < ctx->info->push.count);
1188*61046927SAndroid Build Coastguard Worker 
1189*61046927SAndroid Build Coastguard Worker             ctx->ubo_mask |= BITSET_BIT(ctx->info->push.words[idx].ubo);
1190*61046927SAndroid Build Coastguard Worker 
1191*61046927SAndroid Build Coastguard Worker             midgard_instruction ld = {
1192*61046927SAndroid Build Coastguard Worker                .type = TAG_LOAD_STORE_4,
1193*61046927SAndroid Build Coastguard Worker                .mask = 0xF,
1194*61046927SAndroid Build Coastguard Worker                .dest = temp,
1195*61046927SAndroid Build Coastguard Worker                .dest_type = ins->src_types[i],
1196*61046927SAndroid Build Coastguard Worker                .src = {~0, ~0, ~0, ~0},
1197*61046927SAndroid Build Coastguard Worker                .swizzle = SWIZZLE_IDENTITY_4,
1198*61046927SAndroid Build Coastguard Worker                .op = midgard_op_ld_ubo_128,
1199*61046927SAndroid Build Coastguard Worker                .load_store =
1200*61046927SAndroid Build Coastguard Worker                   {
1201*61046927SAndroid Build Coastguard Worker                      .index_reg = REGISTER_LDST_ZERO,
1202*61046927SAndroid Build Coastguard Worker                   },
1203*61046927SAndroid Build Coastguard Worker                .constants.u32[0] = ctx->info->push.words[idx].offset,
1204*61046927SAndroid Build Coastguard Worker             };
1205*61046927SAndroid Build Coastguard Worker 
1206*61046927SAndroid Build Coastguard Worker             midgard_pack_ubo_index_imm(&ld.load_store,
1207*61046927SAndroid Build Coastguard Worker                                        ctx->info->push.words[idx].ubo);
1208*61046927SAndroid Build Coastguard Worker 
1209*61046927SAndroid Build Coastguard Worker             mir_insert_instruction_before_scheduled(ctx, block, before, ld);
1210*61046927SAndroid Build Coastguard Worker 
1211*61046927SAndroid Build Coastguard Worker             mir_rewrite_index_src_single(ins, ins->src[i], temp);
1212*61046927SAndroid Build Coastguard Worker          }
1213*61046927SAndroid Build Coastguard Worker       }
1214*61046927SAndroid Build Coastguard Worker    }
1215*61046927SAndroid Build Coastguard Worker 
1216*61046927SAndroid Build Coastguard Worker    ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4);
1217*61046927SAndroid Build Coastguard Worker }
1218*61046927SAndroid Build Coastguard Worker 
1219*61046927SAndroid Build Coastguard Worker /* Run register allocation in a loop, spilling until we succeed */
1220*61046927SAndroid Build Coastguard Worker 
1221*61046927SAndroid Build Coastguard Worker void
mir_ra(compiler_context * ctx)1222*61046927SAndroid Build Coastguard Worker mir_ra(compiler_context *ctx)
1223*61046927SAndroid Build Coastguard Worker {
1224*61046927SAndroid Build Coastguard Worker    struct lcra_state *l = NULL;
1225*61046927SAndroid Build Coastguard Worker    bool spilled = false;
1226*61046927SAndroid Build Coastguard Worker    int iter_count = 1000; /* max iterations */
1227*61046927SAndroid Build Coastguard Worker 
1228*61046927SAndroid Build Coastguard Worker    /* Number of 128-bit slots in memory we've spilled into */
1229*61046927SAndroid Build Coastguard Worker    unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);
1230*61046927SAndroid Build Coastguard Worker 
1231*61046927SAndroid Build Coastguard Worker    mir_create_pipeline_registers(ctx);
1232*61046927SAndroid Build Coastguard Worker 
1233*61046927SAndroid Build Coastguard Worker    do {
1234*61046927SAndroid Build Coastguard Worker       if (spilled) {
1235*61046927SAndroid Build Coastguard Worker          signed spill_node = mir_choose_spill_node(ctx, l);
1236*61046927SAndroid Build Coastguard Worker          unsigned uniforms = ctx->info->push.count / 4;
1237*61046927SAndroid Build Coastguard Worker 
1238*61046927SAndroid Build Coastguard Worker          /* It's a lot cheaper to demote uniforms to get more
1239*61046927SAndroid Build Coastguard Worker           * work registers than to spill to TLS. */
1240*61046927SAndroid Build Coastguard Worker          if (l->spill_class == REG_CLASS_WORK && uniforms > 8) {
1241*61046927SAndroid Build Coastguard Worker             mir_demote_uniforms(ctx, MAX2(uniforms - 4, 8));
1242*61046927SAndroid Build Coastguard Worker          } else if (spill_node == -1) {
1243*61046927SAndroid Build Coastguard Worker             fprintf(stderr, "ERROR: Failed to choose spill node\n");
1244*61046927SAndroid Build Coastguard Worker             lcra_free(l);
1245*61046927SAndroid Build Coastguard Worker             return;
1246*61046927SAndroid Build Coastguard Worker          } else {
1247*61046927SAndroid Build Coastguard Worker             mir_spill_register(ctx, spill_node, l->spill_class, &spill_count);
1248*61046927SAndroid Build Coastguard Worker          }
1249*61046927SAndroid Build Coastguard Worker       }
1250*61046927SAndroid Build Coastguard Worker 
1251*61046927SAndroid Build Coastguard Worker       mir_squeeze_index(ctx);
1252*61046927SAndroid Build Coastguard Worker       mir_invalidate_liveness(ctx);
1253*61046927SAndroid Build Coastguard Worker 
1254*61046927SAndroid Build Coastguard Worker       if (l) {
1255*61046927SAndroid Build Coastguard Worker          lcra_free(l);
1256*61046927SAndroid Build Coastguard Worker          l = NULL;
1257*61046927SAndroid Build Coastguard Worker       }
1258*61046927SAndroid Build Coastguard Worker 
1259*61046927SAndroid Build Coastguard Worker       l = allocate_registers(ctx, &spilled);
1260*61046927SAndroid Build Coastguard Worker    } while (spilled && ((iter_count--) > 0));
1261*61046927SAndroid Build Coastguard Worker 
1262*61046927SAndroid Build Coastguard Worker    if (iter_count <= 0) {
1263*61046927SAndroid Build Coastguard Worker       fprintf(
1264*61046927SAndroid Build Coastguard Worker          stderr,
1265*61046927SAndroid Build Coastguard Worker          "panfrost: Gave up allocating registers, rendering will be incomplete\n");
1266*61046927SAndroid Build Coastguard Worker       assert(0);
1267*61046927SAndroid Build Coastguard Worker    }
1268*61046927SAndroid Build Coastguard Worker 
1269*61046927SAndroid Build Coastguard Worker    /* Report spilling information. spill_count is in 128-bit slots (vec4 x
1270*61046927SAndroid Build Coastguard Worker     * fp32), but tls_size is in bytes, so multiply by 16 */
1271*61046927SAndroid Build Coastguard Worker 
1272*61046927SAndroid Build Coastguard Worker    ctx->info->tls_size = spill_count * 16;
1273*61046927SAndroid Build Coastguard Worker 
1274*61046927SAndroid Build Coastguard Worker    install_registers(ctx, l);
1275*61046927SAndroid Build Coastguard Worker 
1276*61046927SAndroid Build Coastguard Worker    lcra_free(l);
1277*61046927SAndroid Build Coastguard Worker }
1278