xref: /aosp_15_r20/external/mesa3d/src/panfrost/compiler/bi_ra.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright (C) 2020 Collabora Ltd.
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
5*61046927SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
6*61046927SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
7*61046927SAndroid Build Coastguard Worker  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*61046927SAndroid Build Coastguard Worker  * and/or sell copies of the Software, and to permit persons to whom the
9*61046927SAndroid Build Coastguard Worker  * Software is furnished to do so, subject to the following conditions:
10*61046927SAndroid Build Coastguard Worker  *
11*61046927SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice (including the next
12*61046927SAndroid Build Coastguard Worker  * paragraph) shall be included in all copies or substantial portions of the
13*61046927SAndroid Build Coastguard Worker  * Software.
14*61046927SAndroid Build Coastguard Worker  *
15*61046927SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18*61046927SAndroid Build Coastguard Worker  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20*61046927SAndroid Build Coastguard Worker  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21*61046927SAndroid Build Coastguard Worker  * SOFTWARE.
22*61046927SAndroid Build Coastguard Worker  *
23*61046927SAndroid Build Coastguard Worker  * Authors (Collabora):
24*61046927SAndroid Build Coastguard Worker  *      Alyssa Rosenzweig <[email protected]>
25*61046927SAndroid Build Coastguard Worker  */
26*61046927SAndroid Build Coastguard Worker 
27*61046927SAndroid Build Coastguard Worker #include "util/u_memory.h"
28*61046927SAndroid Build Coastguard Worker #include "bi_builder.h"
29*61046927SAndroid Build Coastguard Worker #include "compiler.h"
30*61046927SAndroid Build Coastguard Worker #include "nodearray.h"
31*61046927SAndroid Build Coastguard Worker 
32*61046927SAndroid Build Coastguard Worker struct lcra_state {
33*61046927SAndroid Build Coastguard Worker    unsigned node_count;
34*61046927SAndroid Build Coastguard Worker    uint64_t *affinity;
35*61046927SAndroid Build Coastguard Worker 
36*61046927SAndroid Build Coastguard Worker    /* Linear constraints imposed. For each node there there is a
37*61046927SAndroid Build Coastguard Worker     * 'nodearray' structure, which changes between a sparse and dense
38*61046927SAndroid Build Coastguard Worker     * array depending on the number of elements.
39*61046927SAndroid Build Coastguard Worker     *
40*61046927SAndroid Build Coastguard Worker     * Each element is itself a bit field denoting whether (c_j - c_i) bias
41*61046927SAndroid Build Coastguard Worker     * is present or not, including negative biases.
42*61046927SAndroid Build Coastguard Worker     *
43*61046927SAndroid Build Coastguard Worker     * We support up to 8 components so the bias is in range
44*61046927SAndroid Build Coastguard Worker     * [-7, 7] encoded by a 16-bit field
45*61046927SAndroid Build Coastguard Worker     */
46*61046927SAndroid Build Coastguard Worker    nodearray *linear;
47*61046927SAndroid Build Coastguard Worker 
48*61046927SAndroid Build Coastguard Worker    /* Before solving, forced registers; after solving, solutions. */
49*61046927SAndroid Build Coastguard Worker    unsigned *solutions;
50*61046927SAndroid Build Coastguard Worker 
51*61046927SAndroid Build Coastguard Worker    /** Node which caused register allocation to fail */
52*61046927SAndroid Build Coastguard Worker    unsigned spill_node;
53*61046927SAndroid Build Coastguard Worker };
54*61046927SAndroid Build Coastguard Worker 
55*61046927SAndroid Build Coastguard Worker /* This module is an implementation of "Linearly Constrained
56*61046927SAndroid Build Coastguard Worker  * Register Allocation". The paper is available in PDF form
57*61046927SAndroid Build Coastguard Worker  * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
58*61046927SAndroid Build Coastguard Worker  * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
59*61046927SAndroid Build Coastguard Worker  */
60*61046927SAndroid Build Coastguard Worker 
61*61046927SAndroid Build Coastguard Worker static struct lcra_state *
lcra_alloc_equations(unsigned node_count)62*61046927SAndroid Build Coastguard Worker lcra_alloc_equations(unsigned node_count)
63*61046927SAndroid Build Coastguard Worker {
64*61046927SAndroid Build Coastguard Worker    struct lcra_state *l = calloc(1, sizeof(*l));
65*61046927SAndroid Build Coastguard Worker 
66*61046927SAndroid Build Coastguard Worker    l->node_count = node_count;
67*61046927SAndroid Build Coastguard Worker 
68*61046927SAndroid Build Coastguard Worker    l->linear = calloc(sizeof(l->linear[0]), node_count);
69*61046927SAndroid Build Coastguard Worker    l->solutions = calloc(sizeof(l->solutions[0]), node_count);
70*61046927SAndroid Build Coastguard Worker    l->affinity = calloc(sizeof(l->affinity[0]), node_count);
71*61046927SAndroid Build Coastguard Worker 
72*61046927SAndroid Build Coastguard Worker    memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
73*61046927SAndroid Build Coastguard Worker 
74*61046927SAndroid Build Coastguard Worker    return l;
75*61046927SAndroid Build Coastguard Worker }
76*61046927SAndroid Build Coastguard Worker 
77*61046927SAndroid Build Coastguard Worker static void
lcra_free(struct lcra_state * l)78*61046927SAndroid Build Coastguard Worker lcra_free(struct lcra_state *l)
79*61046927SAndroid Build Coastguard Worker {
80*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < l->node_count; ++i)
81*61046927SAndroid Build Coastguard Worker       nodearray_reset(&l->linear[i]);
82*61046927SAndroid Build Coastguard Worker 
83*61046927SAndroid Build Coastguard Worker    free(l->linear);
84*61046927SAndroid Build Coastguard Worker    free(l->affinity);
85*61046927SAndroid Build Coastguard Worker    free(l->solutions);
86*61046927SAndroid Build Coastguard Worker    free(l);
87*61046927SAndroid Build Coastguard Worker }
88*61046927SAndroid Build Coastguard Worker 
89*61046927SAndroid Build Coastguard Worker static void
lcra_add_node_interference(struct lcra_state * l,unsigned i,unsigned cmask_i,unsigned j,unsigned cmask_j)90*61046927SAndroid Build Coastguard Worker lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i,
91*61046927SAndroid Build Coastguard Worker                            unsigned j, unsigned cmask_j)
92*61046927SAndroid Build Coastguard Worker {
93*61046927SAndroid Build Coastguard Worker    if (i == j)
94*61046927SAndroid Build Coastguard Worker       return;
95*61046927SAndroid Build Coastguard Worker 
96*61046927SAndroid Build Coastguard Worker    nodearray_value constraint_fw = 0;
97*61046927SAndroid Build Coastguard Worker    nodearray_value constraint_bw = 0;
98*61046927SAndroid Build Coastguard Worker 
99*61046927SAndroid Build Coastguard Worker    /* The constraint bits are reversed from lcra.c so that register
100*61046927SAndroid Build Coastguard Worker     * allocation can be done in parallel for every possible solution,
101*61046927SAndroid Build Coastguard Worker     * with lower-order bits representing smaller registers. */
102*61046927SAndroid Build Coastguard Worker 
103*61046927SAndroid Build Coastguard Worker    for (unsigned D = 0; D < 8; ++D) {
104*61046927SAndroid Build Coastguard Worker       if (cmask_i & (cmask_j << D)) {
105*61046927SAndroid Build Coastguard Worker          constraint_fw |= (1 << (7 + D));
106*61046927SAndroid Build Coastguard Worker          constraint_bw |= (1 << (7 - D));
107*61046927SAndroid Build Coastguard Worker       }
108*61046927SAndroid Build Coastguard Worker 
109*61046927SAndroid Build Coastguard Worker       if (cmask_i & (cmask_j >> D)) {
110*61046927SAndroid Build Coastguard Worker          constraint_bw |= (1 << (7 + D));
111*61046927SAndroid Build Coastguard Worker          constraint_fw |= (1 << (7 - D));
112*61046927SAndroid Build Coastguard Worker       }
113*61046927SAndroid Build Coastguard Worker    }
114*61046927SAndroid Build Coastguard Worker 
115*61046927SAndroid Build Coastguard Worker    /* Use dense arrays after adding 256 elements */
116*61046927SAndroid Build Coastguard Worker    nodearray_orr(&l->linear[j], i, constraint_fw, 256, l->node_count);
117*61046927SAndroid Build Coastguard Worker    nodearray_orr(&l->linear[i], j, constraint_bw, 256, l->node_count);
118*61046927SAndroid Build Coastguard Worker }
119*61046927SAndroid Build Coastguard Worker 
120*61046927SAndroid Build Coastguard Worker static bool
lcra_test_linear(struct lcra_state * l,unsigned * solutions,unsigned i)121*61046927SAndroid Build Coastguard Worker lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
122*61046927SAndroid Build Coastguard Worker {
123*61046927SAndroid Build Coastguard Worker    signed constant = solutions[i];
124*61046927SAndroid Build Coastguard Worker 
125*61046927SAndroid Build Coastguard Worker    if (nodearray_is_sparse(&l->linear[i])) {
126*61046927SAndroid Build Coastguard Worker       nodearray_sparse_foreach(&l->linear[i], elem) {
127*61046927SAndroid Build Coastguard Worker          unsigned j = nodearray_sparse_key(elem);
128*61046927SAndroid Build Coastguard Worker          nodearray_value constraint = nodearray_sparse_value(elem);
129*61046927SAndroid Build Coastguard Worker 
130*61046927SAndroid Build Coastguard Worker          if (solutions[j] == ~0)
131*61046927SAndroid Build Coastguard Worker             continue;
132*61046927SAndroid Build Coastguard Worker 
133*61046927SAndroid Build Coastguard Worker          signed lhs = constant - solutions[j];
134*61046927SAndroid Build Coastguard Worker 
135*61046927SAndroid Build Coastguard Worker          if (lhs < -7 || lhs > 7)
136*61046927SAndroid Build Coastguard Worker             continue;
137*61046927SAndroid Build Coastguard Worker 
138*61046927SAndroid Build Coastguard Worker          if (constraint & (1 << (lhs + 7)))
139*61046927SAndroid Build Coastguard Worker             return false;
140*61046927SAndroid Build Coastguard Worker       }
141*61046927SAndroid Build Coastguard Worker 
142*61046927SAndroid Build Coastguard Worker       return true;
143*61046927SAndroid Build Coastguard Worker    }
144*61046927SAndroid Build Coastguard Worker 
145*61046927SAndroid Build Coastguard Worker    nodearray_value *row = l->linear[i].dense;
146*61046927SAndroid Build Coastguard Worker 
147*61046927SAndroid Build Coastguard Worker    for (unsigned j = 0; j < l->node_count; ++j) {
148*61046927SAndroid Build Coastguard Worker       if (solutions[j] == ~0)
149*61046927SAndroid Build Coastguard Worker          continue;
150*61046927SAndroid Build Coastguard Worker 
151*61046927SAndroid Build Coastguard Worker       signed lhs = constant - solutions[j];
152*61046927SAndroid Build Coastguard Worker 
153*61046927SAndroid Build Coastguard Worker       if (lhs < -7 || lhs > 7)
154*61046927SAndroid Build Coastguard Worker          continue;
155*61046927SAndroid Build Coastguard Worker 
156*61046927SAndroid Build Coastguard Worker       if (row[j] & (1 << (lhs + 7)))
157*61046927SAndroid Build Coastguard Worker          return false;
158*61046927SAndroid Build Coastguard Worker    }
159*61046927SAndroid Build Coastguard Worker 
160*61046927SAndroid Build Coastguard Worker    return true;
161*61046927SAndroid Build Coastguard Worker }
162*61046927SAndroid Build Coastguard Worker 
163*61046927SAndroid Build Coastguard Worker static bool
lcra_solve(struct lcra_state * l)164*61046927SAndroid Build Coastguard Worker lcra_solve(struct lcra_state *l)
165*61046927SAndroid Build Coastguard Worker {
166*61046927SAndroid Build Coastguard Worker    for (unsigned step = 0; step < l->node_count; ++step) {
167*61046927SAndroid Build Coastguard Worker       if (l->solutions[step] != ~0)
168*61046927SAndroid Build Coastguard Worker          continue;
169*61046927SAndroid Build Coastguard Worker       if (l->affinity[step] == 0)
170*61046927SAndroid Build Coastguard Worker          continue;
171*61046927SAndroid Build Coastguard Worker 
172*61046927SAndroid Build Coastguard Worker       bool succ = false;
173*61046927SAndroid Build Coastguard Worker 
174*61046927SAndroid Build Coastguard Worker       u_foreach_bit64(r, l->affinity[step]) {
175*61046927SAndroid Build Coastguard Worker          l->solutions[step] = r;
176*61046927SAndroid Build Coastguard Worker 
177*61046927SAndroid Build Coastguard Worker          if (lcra_test_linear(l, l->solutions, step)) {
178*61046927SAndroid Build Coastguard Worker             succ = true;
179*61046927SAndroid Build Coastguard Worker             break;
180*61046927SAndroid Build Coastguard Worker          }
181*61046927SAndroid Build Coastguard Worker       }
182*61046927SAndroid Build Coastguard Worker 
183*61046927SAndroid Build Coastguard Worker       /* Out of registers - prepare to spill */
184*61046927SAndroid Build Coastguard Worker       if (!succ) {
185*61046927SAndroid Build Coastguard Worker          l->spill_node = step;
186*61046927SAndroid Build Coastguard Worker          return false;
187*61046927SAndroid Build Coastguard Worker       }
188*61046927SAndroid Build Coastguard Worker    }
189*61046927SAndroid Build Coastguard Worker 
190*61046927SAndroid Build Coastguard Worker    return true;
191*61046927SAndroid Build Coastguard Worker }
192*61046927SAndroid Build Coastguard Worker 
193*61046927SAndroid Build Coastguard Worker /* Register spilling is implemented with a cost-benefit system. Costs are set
194*61046927SAndroid Build Coastguard Worker  * by the user. Benefits are calculated from the constraints. */
195*61046927SAndroid Build Coastguard Worker 
196*61046927SAndroid Build Coastguard Worker static unsigned
lcra_count_constraints(struct lcra_state * l,unsigned i)197*61046927SAndroid Build Coastguard Worker lcra_count_constraints(struct lcra_state *l, unsigned i)
198*61046927SAndroid Build Coastguard Worker {
199*61046927SAndroid Build Coastguard Worker    unsigned count = 0;
200*61046927SAndroid Build Coastguard Worker    nodearray *constraints = &l->linear[i];
201*61046927SAndroid Build Coastguard Worker 
202*61046927SAndroid Build Coastguard Worker    if (nodearray_is_sparse(constraints)) {
203*61046927SAndroid Build Coastguard Worker       nodearray_sparse_foreach(constraints, elem)
204*61046927SAndroid Build Coastguard Worker          count += util_bitcount(nodearray_sparse_value(elem));
205*61046927SAndroid Build Coastguard Worker    } else {
206*61046927SAndroid Build Coastguard Worker       nodearray_dense_foreach_64(constraints, elem)
207*61046927SAndroid Build Coastguard Worker          count += util_bitcount64(*elem);
208*61046927SAndroid Build Coastguard Worker    }
209*61046927SAndroid Build Coastguard Worker 
210*61046927SAndroid Build Coastguard Worker    return count;
211*61046927SAndroid Build Coastguard Worker }
212*61046927SAndroid Build Coastguard Worker 
213*61046927SAndroid Build Coastguard Worker /* Liveness analysis is a backwards-may dataflow analysis pass. Within a block,
214*61046927SAndroid Build Coastguard Worker  * we compute live_out from live_in. The intrablock pass is linear-time. It
215*61046927SAndroid Build Coastguard Worker  * returns whether progress was made. */
216*61046927SAndroid Build Coastguard Worker 
217*61046927SAndroid Build Coastguard Worker static void
bi_liveness_ins_update_ra(uint8_t * live,bi_instr * ins)218*61046927SAndroid Build Coastguard Worker bi_liveness_ins_update_ra(uint8_t *live, bi_instr *ins)
219*61046927SAndroid Build Coastguard Worker {
220*61046927SAndroid Build Coastguard Worker    /* live_in[s] = GEN[s] + (live_out[s] - KILL[s]) */
221*61046927SAndroid Build Coastguard Worker 
222*61046927SAndroid Build Coastguard Worker    bi_foreach_dest(ins, d) {
223*61046927SAndroid Build Coastguard Worker       live[ins->dest[d].value] &= ~bi_writemask(ins, d);
224*61046927SAndroid Build Coastguard Worker    }
225*61046927SAndroid Build Coastguard Worker 
226*61046927SAndroid Build Coastguard Worker    bi_foreach_ssa_src(ins, src) {
227*61046927SAndroid Build Coastguard Worker       unsigned count = bi_count_read_registers(ins, src);
228*61046927SAndroid Build Coastguard Worker       unsigned rmask = BITFIELD_MASK(count);
229*61046927SAndroid Build Coastguard Worker 
230*61046927SAndroid Build Coastguard Worker       live[ins->src[src].value] |= (rmask << ins->src[src].offset);
231*61046927SAndroid Build Coastguard Worker    }
232*61046927SAndroid Build Coastguard Worker }
233*61046927SAndroid Build Coastguard Worker 
234*61046927SAndroid Build Coastguard Worker static bool
liveness_block_update(bi_block * blk,unsigned temp_count)235*61046927SAndroid Build Coastguard Worker liveness_block_update(bi_block *blk, unsigned temp_count)
236*61046927SAndroid Build Coastguard Worker {
237*61046927SAndroid Build Coastguard Worker    bool progress = false;
238*61046927SAndroid Build Coastguard Worker 
239*61046927SAndroid Build Coastguard Worker    /* live_out[s] = sum { p in succ[s] } ( live_in[p] ) */
240*61046927SAndroid Build Coastguard Worker    bi_foreach_successor(blk, succ) {
241*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < temp_count; ++i)
242*61046927SAndroid Build Coastguard Worker          blk->live_out[i] |= succ->live_in[i];
243*61046927SAndroid Build Coastguard Worker    }
244*61046927SAndroid Build Coastguard Worker 
245*61046927SAndroid Build Coastguard Worker    uint8_t *live = ralloc_array(blk, uint8_t, temp_count);
246*61046927SAndroid Build Coastguard Worker    memcpy(live, blk->live_out, temp_count);
247*61046927SAndroid Build Coastguard Worker 
248*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_in_block_rev(blk, ins)
249*61046927SAndroid Build Coastguard Worker       bi_liveness_ins_update_ra(live, ins);
250*61046927SAndroid Build Coastguard Worker 
251*61046927SAndroid Build Coastguard Worker    /* To figure out progress, diff live_in */
252*61046927SAndroid Build Coastguard Worker 
253*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; (i < temp_count) && !progress; ++i)
254*61046927SAndroid Build Coastguard Worker       progress |= (blk->live_in[i] != live[i]);
255*61046927SAndroid Build Coastguard Worker 
256*61046927SAndroid Build Coastguard Worker    ralloc_free(blk->live_in);
257*61046927SAndroid Build Coastguard Worker    blk->live_in = live;
258*61046927SAndroid Build Coastguard Worker 
259*61046927SAndroid Build Coastguard Worker    return progress;
260*61046927SAndroid Build Coastguard Worker }
261*61046927SAndroid Build Coastguard Worker 
262*61046927SAndroid Build Coastguard Worker /* Globally, liveness analysis uses a fixed-point algorithm based on a
263*61046927SAndroid Build Coastguard Worker  * worklist. We initialize a work list with the exit block. We iterate the work
264*61046927SAndroid Build Coastguard Worker  * list to compute live_in from live_out for each block on the work list,
265*61046927SAndroid Build Coastguard Worker  * adding the predecessors of the block to the work list if we made progress.
266*61046927SAndroid Build Coastguard Worker  */
267*61046927SAndroid Build Coastguard Worker 
268*61046927SAndroid Build Coastguard Worker static void
bi_compute_liveness_ra(bi_context * ctx)269*61046927SAndroid Build Coastguard Worker bi_compute_liveness_ra(bi_context *ctx)
270*61046927SAndroid Build Coastguard Worker {
271*61046927SAndroid Build Coastguard Worker    u_worklist worklist;
272*61046927SAndroid Build Coastguard Worker    bi_worklist_init(ctx, &worklist);
273*61046927SAndroid Build Coastguard Worker 
274*61046927SAndroid Build Coastguard Worker    bi_foreach_block(ctx, block) {
275*61046927SAndroid Build Coastguard Worker       if (block->live_in)
276*61046927SAndroid Build Coastguard Worker          ralloc_free(block->live_in);
277*61046927SAndroid Build Coastguard Worker 
278*61046927SAndroid Build Coastguard Worker       if (block->live_out)
279*61046927SAndroid Build Coastguard Worker          ralloc_free(block->live_out);
280*61046927SAndroid Build Coastguard Worker 
281*61046927SAndroid Build Coastguard Worker       block->live_in = rzalloc_array(block, uint8_t, ctx->ssa_alloc);
282*61046927SAndroid Build Coastguard Worker       block->live_out = rzalloc_array(block, uint8_t, ctx->ssa_alloc);
283*61046927SAndroid Build Coastguard Worker 
284*61046927SAndroid Build Coastguard Worker       bi_worklist_push_tail(&worklist, block);
285*61046927SAndroid Build Coastguard Worker    }
286*61046927SAndroid Build Coastguard Worker 
287*61046927SAndroid Build Coastguard Worker    while (!u_worklist_is_empty(&worklist)) {
288*61046927SAndroid Build Coastguard Worker       /* Pop off in reverse order since liveness is backwards */
289*61046927SAndroid Build Coastguard Worker       bi_block *blk = bi_worklist_pop_tail(&worklist);
290*61046927SAndroid Build Coastguard Worker 
291*61046927SAndroid Build Coastguard Worker       /* Update liveness information. If we made progress, we need to
292*61046927SAndroid Build Coastguard Worker        * reprocess the predecessors
293*61046927SAndroid Build Coastguard Worker        */
294*61046927SAndroid Build Coastguard Worker       if (liveness_block_update(blk, ctx->ssa_alloc)) {
295*61046927SAndroid Build Coastguard Worker          bi_foreach_predecessor(blk, pred)
296*61046927SAndroid Build Coastguard Worker             bi_worklist_push_head(&worklist, *pred);
297*61046927SAndroid Build Coastguard Worker       }
298*61046927SAndroid Build Coastguard Worker    }
299*61046927SAndroid Build Coastguard Worker 
300*61046927SAndroid Build Coastguard Worker    u_worklist_fini(&worklist);
301*61046927SAndroid Build Coastguard Worker }
302*61046927SAndroid Build Coastguard Worker 
303*61046927SAndroid Build Coastguard Worker /* Construct an affinity mask such that the vector with `count` elements does
304*61046927SAndroid Build Coastguard Worker  * not intersect any of the registers in the bitset `clobber`. In other words,
305*61046927SAndroid Build Coastguard Worker  * an allocated register r needs to satisfy for each i < count: a + i != b.
306*61046927SAndroid Build Coastguard Worker  * Equivalently that's a != b - i, so we need a \ne { b - i : i < n }. For the
307*61046927SAndroid Build Coastguard Worker  * entire clobber set B, we need a \ne union b \in B { b - i : i < n }, where
308*61046927SAndroid Build Coastguard Worker  * that union is the desired clobber set. That may be written equivalently as
309*61046927SAndroid Build Coastguard Worker  * the union over i < n of (B - i), where subtraction is defined elementwise
310*61046927SAndroid Build Coastguard Worker  * and corresponds to a shift of the entire bitset.
311*61046927SAndroid Build Coastguard Worker  *
312*61046927SAndroid Build Coastguard Worker  * EVEN_BITS_MASK is an affinity mask for aligned register pairs. Interpreted
313*61046927SAndroid Build Coastguard Worker  * as a bit set, it is { x : 0 <= x < 64 if x is even }
314*61046927SAndroid Build Coastguard Worker  */
315*61046927SAndroid Build Coastguard Worker 
316*61046927SAndroid Build Coastguard Worker #define EVEN_BITS_MASK (0x5555555555555555ull)
317*61046927SAndroid Build Coastguard Worker 
318*61046927SAndroid Build Coastguard Worker static uint64_t
bi_make_affinity(uint64_t clobber,unsigned count,bool split_file)319*61046927SAndroid Build Coastguard Worker bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
320*61046927SAndroid Build Coastguard Worker {
321*61046927SAndroid Build Coastguard Worker    uint64_t clobbered = 0;
322*61046927SAndroid Build Coastguard Worker 
323*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < count; ++i)
324*61046927SAndroid Build Coastguard Worker       clobbered |= (clobber >> i);
325*61046927SAndroid Build Coastguard Worker 
326*61046927SAndroid Build Coastguard Worker    /* Don't allocate past the end of the register file */
327*61046927SAndroid Build Coastguard Worker    if (count > 1) {
328*61046927SAndroid Build Coastguard Worker       unsigned excess = count - 1;
329*61046927SAndroid Build Coastguard Worker       uint64_t mask = BITFIELD_MASK(excess);
330*61046927SAndroid Build Coastguard Worker       clobbered |= mask << (64 - excess);
331*61046927SAndroid Build Coastguard Worker 
332*61046927SAndroid Build Coastguard Worker       if (split_file)
333*61046927SAndroid Build Coastguard Worker          clobbered |= mask << (16 - excess);
334*61046927SAndroid Build Coastguard Worker    }
335*61046927SAndroid Build Coastguard Worker 
336*61046927SAndroid Build Coastguard Worker    /* Don't allocate the middle if we split out the middle */
337*61046927SAndroid Build Coastguard Worker    if (split_file)
338*61046927SAndroid Build Coastguard Worker       clobbered |= BITFIELD64_MASK(32) << 16;
339*61046927SAndroid Build Coastguard Worker 
340*61046927SAndroid Build Coastguard Worker    /* We can use a register iff it's not clobberred */
341*61046927SAndroid Build Coastguard Worker    return ~clobbered;
342*61046927SAndroid Build Coastguard Worker }
343*61046927SAndroid Build Coastguard Worker 
344*61046927SAndroid Build Coastguard Worker static void
bi_mark_interference(bi_block * block,struct lcra_state * l,uint8_t * live,uint64_t preload_live,unsigned node_count,bool is_blend,bool split_file,bool aligned_sr)345*61046927SAndroid Build Coastguard Worker bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
346*61046927SAndroid Build Coastguard Worker                      uint64_t preload_live, unsigned node_count, bool is_blend,
347*61046927SAndroid Build Coastguard Worker                      bool split_file, bool aligned_sr)
348*61046927SAndroid Build Coastguard Worker {
349*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_in_block_rev(block, ins) {
350*61046927SAndroid Build Coastguard Worker       /* Mark all registers live after the instruction as
351*61046927SAndroid Build Coastguard Worker        * interfering with the destination */
352*61046927SAndroid Build Coastguard Worker 
353*61046927SAndroid Build Coastguard Worker       bi_foreach_dest(ins, d) {
354*61046927SAndroid Build Coastguard Worker          unsigned node = ins->dest[d].value;
355*61046927SAndroid Build Coastguard Worker 
356*61046927SAndroid Build Coastguard Worker          /* Don't allocate to anything that's read later as a
357*61046927SAndroid Build Coastguard Worker           * preloaded register. The affinity is the intersection
358*61046927SAndroid Build Coastguard Worker           * of affinity masks for each write. Since writes have
359*61046927SAndroid Build Coastguard Worker           * offsets, but the affinity is for the whole node, we
360*61046927SAndroid Build Coastguard Worker           * need to offset the affinity opposite the write
361*61046927SAndroid Build Coastguard Worker           * offset, so we shift right. */
362*61046927SAndroid Build Coastguard Worker          unsigned count = bi_count_write_registers(ins, d);
363*61046927SAndroid Build Coastguard Worker          unsigned offset = ins->dest[d].offset;
364*61046927SAndroid Build Coastguard Worker          uint64_t affinity =
365*61046927SAndroid Build Coastguard Worker             bi_make_affinity(preload_live, count, split_file) >> offset;
366*61046927SAndroid Build Coastguard Worker          /* Valhall needs >= 64-bit staging writes to be pair-aligned */
367*61046927SAndroid Build Coastguard Worker          if (aligned_sr && (count >= 2 || offset))
368*61046927SAndroid Build Coastguard Worker             affinity &= EVEN_BITS_MASK;
369*61046927SAndroid Build Coastguard Worker 
370*61046927SAndroid Build Coastguard Worker          l->affinity[node] &= affinity;
371*61046927SAndroid Build Coastguard Worker 
372*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < node_count; ++i) {
373*61046927SAndroid Build Coastguard Worker             uint8_t r = live[i];
374*61046927SAndroid Build Coastguard Worker 
375*61046927SAndroid Build Coastguard Worker             /* Nodes only interfere if they occupy
376*61046927SAndroid Build Coastguard Worker              * /different values/ at the same time
377*61046927SAndroid Build Coastguard Worker              * (Boissinot). In particular, sources of
378*61046927SAndroid Build Coastguard Worker              * moves do not interfere with their
379*61046927SAndroid Build Coastguard Worker              * destinations. This enables a limited form of
380*61046927SAndroid Build Coastguard Worker              * coalescing.
381*61046927SAndroid Build Coastguard Worker              */
382*61046927SAndroid Build Coastguard Worker             if (ins->op == BI_OPCODE_MOV_I32 && bi_is_ssa(ins->src[0]) &&
383*61046927SAndroid Build Coastguard Worker                 i == ins->src[0].value) {
384*61046927SAndroid Build Coastguard Worker 
385*61046927SAndroid Build Coastguard Worker                r &= ~BITFIELD_BIT(ins->src[0].offset);
386*61046927SAndroid Build Coastguard Worker             }
387*61046927SAndroid Build Coastguard Worker 
388*61046927SAndroid Build Coastguard Worker             if (r) {
389*61046927SAndroid Build Coastguard Worker                lcra_add_node_interference(l, node, bi_writemask(ins, d), i, r);
390*61046927SAndroid Build Coastguard Worker             }
391*61046927SAndroid Build Coastguard Worker          }
392*61046927SAndroid Build Coastguard Worker 
393*61046927SAndroid Build Coastguard Worker          unsigned node_first = ins->dest[0].value;
394*61046927SAndroid Build Coastguard Worker          if (d == 1) {
395*61046927SAndroid Build Coastguard Worker             lcra_add_node_interference(l, node, bi_writemask(ins, 1),
396*61046927SAndroid Build Coastguard Worker                                        node_first, bi_writemask(ins, 0));
397*61046927SAndroid Build Coastguard Worker          }
398*61046927SAndroid Build Coastguard Worker       }
399*61046927SAndroid Build Coastguard Worker 
400*61046927SAndroid Build Coastguard Worker       /* Valhall needs >= 64-bit reads to be pair-aligned */
401*61046927SAndroid Build Coastguard Worker       if (aligned_sr) {
402*61046927SAndroid Build Coastguard Worker          bi_foreach_ssa_src(ins, s) {
403*61046927SAndroid Build Coastguard Worker             if (bi_count_read_registers(ins, s) >= 2)
404*61046927SAndroid Build Coastguard Worker                l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
405*61046927SAndroid Build Coastguard Worker          }
406*61046927SAndroid Build Coastguard Worker       }
407*61046927SAndroid Build Coastguard Worker 
408*61046927SAndroid Build Coastguard Worker       if (!is_blend && ins->op == BI_OPCODE_BLEND) {
409*61046927SAndroid Build Coastguard Worker          /* Blend shaders might clobber r0-r15, r48. */
410*61046927SAndroid Build Coastguard Worker          uint64_t clobber = BITFIELD64_MASK(16) | BITFIELD64_BIT(48);
411*61046927SAndroid Build Coastguard Worker 
412*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < node_count; ++i) {
413*61046927SAndroid Build Coastguard Worker             if (live[i])
414*61046927SAndroid Build Coastguard Worker                l->affinity[i] &= ~clobber;
415*61046927SAndroid Build Coastguard Worker          }
416*61046927SAndroid Build Coastguard Worker       }
417*61046927SAndroid Build Coastguard Worker 
418*61046927SAndroid Build Coastguard Worker       /* Update live_in */
419*61046927SAndroid Build Coastguard Worker       preload_live = bi_postra_liveness_ins(preload_live, ins);
420*61046927SAndroid Build Coastguard Worker       bi_liveness_ins_update_ra(live, ins);
421*61046927SAndroid Build Coastguard Worker    }
422*61046927SAndroid Build Coastguard Worker 
423*61046927SAndroid Build Coastguard Worker    block->reg_live_in = preload_live;
424*61046927SAndroid Build Coastguard Worker }
425*61046927SAndroid Build Coastguard Worker 
426*61046927SAndroid Build Coastguard Worker static void
bi_compute_interference(bi_context * ctx,struct lcra_state * l,bool full_regs)427*61046927SAndroid Build Coastguard Worker bi_compute_interference(bi_context *ctx, struct lcra_state *l, bool full_regs)
428*61046927SAndroid Build Coastguard Worker {
429*61046927SAndroid Build Coastguard Worker    bi_compute_liveness_ra(ctx);
430*61046927SAndroid Build Coastguard Worker    bi_postra_liveness(ctx);
431*61046927SAndroid Build Coastguard Worker 
432*61046927SAndroid Build Coastguard Worker    bi_foreach_block_rev(ctx, blk) {
433*61046927SAndroid Build Coastguard Worker       uint8_t *live = mem_dup(blk->live_out, ctx->ssa_alloc);
434*61046927SAndroid Build Coastguard Worker 
435*61046927SAndroid Build Coastguard Worker       bi_mark_interference(blk, l, live, blk->reg_live_out, ctx->ssa_alloc,
436*61046927SAndroid Build Coastguard Worker                            ctx->inputs->is_blend, !full_regs, ctx->arch >= 9);
437*61046927SAndroid Build Coastguard Worker 
438*61046927SAndroid Build Coastguard Worker       free(live);
439*61046927SAndroid Build Coastguard Worker    }
440*61046927SAndroid Build Coastguard Worker }
441*61046927SAndroid Build Coastguard Worker 
442*61046927SAndroid Build Coastguard Worker static struct lcra_state *
bi_allocate_registers(bi_context * ctx,bool * success,bool full_regs)443*61046927SAndroid Build Coastguard Worker bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
444*61046927SAndroid Build Coastguard Worker {
445*61046927SAndroid Build Coastguard Worker    struct lcra_state *l = lcra_alloc_equations(ctx->ssa_alloc);
446*61046927SAndroid Build Coastguard Worker 
447*61046927SAndroid Build Coastguard Worker    /* Blend shaders are restricted to R0-R15. Other shaders at full
448*61046927SAndroid Build Coastguard Worker     * occupancy also can access R48-R63. At half occupancy they can access
449*61046927SAndroid Build Coastguard Worker     * the whole file. */
450*61046927SAndroid Build Coastguard Worker 
451*61046927SAndroid Build Coastguard Worker    uint64_t default_affinity =
452*61046927SAndroid Build Coastguard Worker       ctx->inputs->is_blend ? BITFIELD64_MASK(16)
453*61046927SAndroid Build Coastguard Worker       : full_regs           ? BITFIELD64_MASK(64)
454*61046927SAndroid Build Coastguard Worker                   : (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48));
455*61046927SAndroid Build Coastguard Worker 
456*61046927SAndroid Build Coastguard Worker    /* To test spilling, mimic a small register file */
457*61046927SAndroid Build Coastguard Worker    if (bifrost_debug & BIFROST_DBG_SPILL && !ctx->inputs->is_blend)
458*61046927SAndroid Build Coastguard Worker       default_affinity &= BITFIELD64_MASK(48) << 8;
459*61046927SAndroid Build Coastguard Worker 
460*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global(ctx, ins) {
461*61046927SAndroid Build Coastguard Worker       bi_foreach_dest(ins, d)
462*61046927SAndroid Build Coastguard Worker          l->affinity[ins->dest[d].value] = default_affinity;
463*61046927SAndroid Build Coastguard Worker 
464*61046927SAndroid Build Coastguard Worker       /* Blend shaders expect the src colour to be in r0-r3 */
465*61046927SAndroid Build Coastguard Worker       if (ins->op == BI_OPCODE_BLEND && !ctx->inputs->is_blend) {
466*61046927SAndroid Build Coastguard Worker          assert(bi_is_ssa(ins->src[0]));
467*61046927SAndroid Build Coastguard Worker          l->solutions[ins->src[0].value] = 0;
468*61046927SAndroid Build Coastguard Worker 
469*61046927SAndroid Build Coastguard Worker          /* Dual source blend input in r4-r7 */
470*61046927SAndroid Build Coastguard Worker          if (bi_is_ssa(ins->src[4]))
471*61046927SAndroid Build Coastguard Worker             l->solutions[ins->src[4].value] = 4;
472*61046927SAndroid Build Coastguard Worker 
473*61046927SAndroid Build Coastguard Worker          /* Writes to R48 */
474*61046927SAndroid Build Coastguard Worker          if (!bi_is_null(ins->dest[0]))
475*61046927SAndroid Build Coastguard Worker             l->solutions[ins->dest[0].value] = 48;
476*61046927SAndroid Build Coastguard Worker       }
477*61046927SAndroid Build Coastguard Worker 
478*61046927SAndroid Build Coastguard Worker       /* Coverage mask writes stay in R60 */
479*61046927SAndroid Build Coastguard Worker       if ((ins->op == BI_OPCODE_ATEST || ins->op == BI_OPCODE_ZS_EMIT) &&
480*61046927SAndroid Build Coastguard Worker           !bi_is_null(ins->dest[0])) {
481*61046927SAndroid Build Coastguard Worker          l->solutions[ins->dest[0].value] = 60;
482*61046927SAndroid Build Coastguard Worker       }
483*61046927SAndroid Build Coastguard Worker 
484*61046927SAndroid Build Coastguard Worker       /* Experimentally, it seems coverage masks inputs to ATEST must
485*61046927SAndroid Build Coastguard Worker        * be in R60. Otherwise coverage mask writes do not work with
486*61046927SAndroid Build Coastguard Worker        * early-ZS with pixel-frequency-shading (this combination of
487*61046927SAndroid Build Coastguard Worker        * settings is legal if depth/stencil writes are disabled).
488*61046927SAndroid Build Coastguard Worker        */
489*61046927SAndroid Build Coastguard Worker       if (ins->op == BI_OPCODE_ATEST) {
490*61046927SAndroid Build Coastguard Worker          assert(bi_is_ssa(ins->src[0]));
491*61046927SAndroid Build Coastguard Worker          l->solutions[ins->src[0].value] = 60;
492*61046927SAndroid Build Coastguard Worker       }
493*61046927SAndroid Build Coastguard Worker    }
494*61046927SAndroid Build Coastguard Worker 
495*61046927SAndroid Build Coastguard Worker    bi_compute_interference(ctx, l, full_regs);
496*61046927SAndroid Build Coastguard Worker 
497*61046927SAndroid Build Coastguard Worker    /* Coalesce register moves if we're allowed. We need to be careful due
498*61046927SAndroid Build Coastguard Worker     * to the restricted affinity induced by the blend shader ABI.
499*61046927SAndroid Build Coastguard Worker     */
500*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global(ctx, I) {
501*61046927SAndroid Build Coastguard Worker       if (I->op != BI_OPCODE_MOV_I32)
502*61046927SAndroid Build Coastguard Worker          continue;
503*61046927SAndroid Build Coastguard Worker       if (I->src[0].type != BI_INDEX_REGISTER)
504*61046927SAndroid Build Coastguard Worker          continue;
505*61046927SAndroid Build Coastguard Worker 
506*61046927SAndroid Build Coastguard Worker       unsigned reg = I->src[0].value;
507*61046927SAndroid Build Coastguard Worker       unsigned node = I->dest[0].value;
508*61046927SAndroid Build Coastguard Worker 
509*61046927SAndroid Build Coastguard Worker       if (l->solutions[node] != ~0)
510*61046927SAndroid Build Coastguard Worker          continue;
511*61046927SAndroid Build Coastguard Worker 
512*61046927SAndroid Build Coastguard Worker       uint64_t affinity = l->affinity[node];
513*61046927SAndroid Build Coastguard Worker 
514*61046927SAndroid Build Coastguard Worker       if (ctx->inputs->is_blend) {
515*61046927SAndroid Build Coastguard Worker          /* We're allowed to coalesce the moves to these */
516*61046927SAndroid Build Coastguard Worker          affinity |= BITFIELD64_BIT(48);
517*61046927SAndroid Build Coastguard Worker          affinity |= BITFIELD64_BIT(60);
518*61046927SAndroid Build Coastguard Worker       }
519*61046927SAndroid Build Coastguard Worker 
520*61046927SAndroid Build Coastguard Worker       /* Try to coalesce */
521*61046927SAndroid Build Coastguard Worker       if (affinity & BITFIELD64_BIT(reg)) {
522*61046927SAndroid Build Coastguard Worker          l->solutions[node] = reg;
523*61046927SAndroid Build Coastguard Worker 
524*61046927SAndroid Build Coastguard Worker          if (!lcra_test_linear(l, l->solutions, node))
525*61046927SAndroid Build Coastguard Worker             l->solutions[node] = ~0;
526*61046927SAndroid Build Coastguard Worker       }
527*61046927SAndroid Build Coastguard Worker    }
528*61046927SAndroid Build Coastguard Worker 
529*61046927SAndroid Build Coastguard Worker    *success = lcra_solve(l);
530*61046927SAndroid Build Coastguard Worker 
531*61046927SAndroid Build Coastguard Worker    return l;
532*61046927SAndroid Build Coastguard Worker }
533*61046927SAndroid Build Coastguard Worker 
534*61046927SAndroid Build Coastguard Worker static bi_index
bi_reg_from_index(bi_context * ctx,struct lcra_state * l,bi_index index)535*61046927SAndroid Build Coastguard Worker bi_reg_from_index(bi_context *ctx, struct lcra_state *l, bi_index index)
536*61046927SAndroid Build Coastguard Worker {
537*61046927SAndroid Build Coastguard Worker    /* Offsets can only be applied when we register allocated an index, or
538*61046927SAndroid Build Coastguard Worker     * alternatively for FAU's encoding */
539*61046927SAndroid Build Coastguard Worker 
540*61046927SAndroid Build Coastguard Worker    ASSERTED bool is_offset = (index.offset > 0) && (index.type != BI_INDEX_FAU);
541*61046927SAndroid Build Coastguard Worker 
542*61046927SAndroid Build Coastguard Worker    /* Did we run RA for this index at all */
543*61046927SAndroid Build Coastguard Worker    if (!bi_is_ssa(index)) {
544*61046927SAndroid Build Coastguard Worker       assert(!is_offset);
545*61046927SAndroid Build Coastguard Worker       return index;
546*61046927SAndroid Build Coastguard Worker    }
547*61046927SAndroid Build Coastguard Worker 
548*61046927SAndroid Build Coastguard Worker    /* LCRA didn't bother solving this index (how lazy!) */
549*61046927SAndroid Build Coastguard Worker    signed solution = l->solutions[index.value];
550*61046927SAndroid Build Coastguard Worker    if (solution < 0) {
551*61046927SAndroid Build Coastguard Worker       assert(!is_offset);
552*61046927SAndroid Build Coastguard Worker       return index;
553*61046927SAndroid Build Coastguard Worker    }
554*61046927SAndroid Build Coastguard Worker 
555*61046927SAndroid Build Coastguard Worker    /* todo: do we want to compose with the subword swizzle? */
556*61046927SAndroid Build Coastguard Worker    bi_index new_index = bi_register(solution + index.offset);
557*61046927SAndroid Build Coastguard Worker    new_index.swizzle = index.swizzle;
558*61046927SAndroid Build Coastguard Worker    new_index.abs = index.abs;
559*61046927SAndroid Build Coastguard Worker    new_index.neg = index.neg;
560*61046927SAndroid Build Coastguard Worker    return new_index;
561*61046927SAndroid Build Coastguard Worker }
562*61046927SAndroid Build Coastguard Worker 
563*61046927SAndroid Build Coastguard Worker /* Dual texture instructions write to two sets of staging registers, modeled as
564*61046927SAndroid Build Coastguard Worker  * two destinations in the IR. The first set is communicated with the usual
565*61046927SAndroid Build Coastguard Worker  * staging register mechanism. The second set is encoded in the texture
566*61046927SAndroid Build Coastguard Worker  * operation descriptor. This is quite unusual, and requires the following late
567*61046927SAndroid Build Coastguard Worker  * fixup.
568*61046927SAndroid Build Coastguard Worker  */
569*61046927SAndroid Build Coastguard Worker static void
bi_fixup_dual_tex_register(bi_instr * I)570*61046927SAndroid Build Coastguard Worker bi_fixup_dual_tex_register(bi_instr *I)
571*61046927SAndroid Build Coastguard Worker {
572*61046927SAndroid Build Coastguard Worker    assert(I->dest[1].type == BI_INDEX_REGISTER);
573*61046927SAndroid Build Coastguard Worker    assert(I->src[3].type == BI_INDEX_CONSTANT);
574*61046927SAndroid Build Coastguard Worker 
575*61046927SAndroid Build Coastguard Worker    struct bifrost_dual_texture_operation desc = {
576*61046927SAndroid Build Coastguard Worker       .secondary_register = I->dest[1].value,
577*61046927SAndroid Build Coastguard Worker    };
578*61046927SAndroid Build Coastguard Worker 
579*61046927SAndroid Build Coastguard Worker    I->src[3].value |= bi_dual_tex_as_u32(desc);
580*61046927SAndroid Build Coastguard Worker }
581*61046927SAndroid Build Coastguard Worker 
582*61046927SAndroid Build Coastguard Worker static void
bi_install_registers(bi_context * ctx,struct lcra_state * l)583*61046927SAndroid Build Coastguard Worker bi_install_registers(bi_context *ctx, struct lcra_state *l)
584*61046927SAndroid Build Coastguard Worker {
585*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global(ctx, ins) {
586*61046927SAndroid Build Coastguard Worker       bi_foreach_dest(ins, d)
587*61046927SAndroid Build Coastguard Worker          ins->dest[d] = bi_reg_from_index(ctx, l, ins->dest[d]);
588*61046927SAndroid Build Coastguard Worker 
589*61046927SAndroid Build Coastguard Worker       bi_foreach_src(ins, s)
590*61046927SAndroid Build Coastguard Worker          ins->src[s] = bi_reg_from_index(ctx, l, ins->src[s]);
591*61046927SAndroid Build Coastguard Worker 
592*61046927SAndroid Build Coastguard Worker       if (ins->op == BI_OPCODE_TEXC_DUAL)
593*61046927SAndroid Build Coastguard Worker          bi_fixup_dual_tex_register(ins);
594*61046927SAndroid Build Coastguard Worker    }
595*61046927SAndroid Build Coastguard Worker }
596*61046927SAndroid Build Coastguard Worker 
597*61046927SAndroid Build Coastguard Worker static void
bi_rewrite_index_src_single(bi_instr * ins,bi_index old,bi_index new)598*61046927SAndroid Build Coastguard Worker bi_rewrite_index_src_single(bi_instr *ins, bi_index old, bi_index new)
599*61046927SAndroid Build Coastguard Worker {
600*61046927SAndroid Build Coastguard Worker    bi_foreach_src(ins, i) {
601*61046927SAndroid Build Coastguard Worker       if (bi_is_equiv(ins->src[i], old)) {
602*61046927SAndroid Build Coastguard Worker          ins->src[i].type = new.type;
603*61046927SAndroid Build Coastguard Worker          ins->src[i].value = new.value;
604*61046927SAndroid Build Coastguard Worker       }
605*61046927SAndroid Build Coastguard Worker    }
606*61046927SAndroid Build Coastguard Worker }
607*61046927SAndroid Build Coastguard Worker 
608*61046927SAndroid Build Coastguard Worker /* If register allocation fails, find the best spill node */
609*61046927SAndroid Build Coastguard Worker 
610*61046927SAndroid Build Coastguard Worker static signed
bi_choose_spill_node(bi_context * ctx,struct lcra_state * l)611*61046927SAndroid Build Coastguard Worker bi_choose_spill_node(bi_context *ctx, struct lcra_state *l)
612*61046927SAndroid Build Coastguard Worker {
613*61046927SAndroid Build Coastguard Worker    /* Pick a node satisfying bi_spill_register's preconditions */
614*61046927SAndroid Build Coastguard Worker    BITSET_WORD *no_spill =
615*61046927SAndroid Build Coastguard Worker       calloc(sizeof(BITSET_WORD), BITSET_WORDS(l->node_count));
616*61046927SAndroid Build Coastguard Worker 
617*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global(ctx, ins) {
618*61046927SAndroid Build Coastguard Worker       bi_foreach_dest(ins, d) {
619*61046927SAndroid Build Coastguard Worker          /* Don't allow spilling coverage mask writes because the
620*61046927SAndroid Build Coastguard Worker           * register preload logic assumes it will stay in R60.
621*61046927SAndroid Build Coastguard Worker           * This could be optimized.
622*61046927SAndroid Build Coastguard Worker           */
623*61046927SAndroid Build Coastguard Worker          if (ins->no_spill || ins->op == BI_OPCODE_ATEST ||
624*61046927SAndroid Build Coastguard Worker              ins->op == BI_OPCODE_ZS_EMIT ||
625*61046927SAndroid Build Coastguard Worker              (ins->op == BI_OPCODE_MOV_I32 &&
626*61046927SAndroid Build Coastguard Worker               ins->src[0].type == BI_INDEX_REGISTER &&
627*61046927SAndroid Build Coastguard Worker               ins->src[0].value == 60)) {
628*61046927SAndroid Build Coastguard Worker             BITSET_SET(no_spill, ins->dest[d].value);
629*61046927SAndroid Build Coastguard Worker          }
630*61046927SAndroid Build Coastguard Worker       }
631*61046927SAndroid Build Coastguard Worker    }
632*61046927SAndroid Build Coastguard Worker 
633*61046927SAndroid Build Coastguard Worker    unsigned best_benefit = 0.0;
634*61046927SAndroid Build Coastguard Worker    signed best_node = -1;
635*61046927SAndroid Build Coastguard Worker 
636*61046927SAndroid Build Coastguard Worker    if (nodearray_is_sparse(&l->linear[l->spill_node])) {
637*61046927SAndroid Build Coastguard Worker       nodearray_sparse_foreach(&l->linear[l->spill_node], elem) {
638*61046927SAndroid Build Coastguard Worker          unsigned i = nodearray_sparse_key(elem);
639*61046927SAndroid Build Coastguard Worker          unsigned constraint = nodearray_sparse_value(elem);
640*61046927SAndroid Build Coastguard Worker 
641*61046927SAndroid Build Coastguard Worker          /* Only spill nodes that interfere with the node failing
642*61046927SAndroid Build Coastguard Worker           * register allocation. It's pointless to spill anything else */
643*61046927SAndroid Build Coastguard Worker          if (!constraint)
644*61046927SAndroid Build Coastguard Worker             continue;
645*61046927SAndroid Build Coastguard Worker 
646*61046927SAndroid Build Coastguard Worker          if (BITSET_TEST(no_spill, i))
647*61046927SAndroid Build Coastguard Worker             continue;
648*61046927SAndroid Build Coastguard Worker 
649*61046927SAndroid Build Coastguard Worker          unsigned benefit = lcra_count_constraints(l, i);
650*61046927SAndroid Build Coastguard Worker 
651*61046927SAndroid Build Coastguard Worker          if (benefit > best_benefit) {
652*61046927SAndroid Build Coastguard Worker             best_benefit = benefit;
653*61046927SAndroid Build Coastguard Worker             best_node = i;
654*61046927SAndroid Build Coastguard Worker          }
655*61046927SAndroid Build Coastguard Worker       }
656*61046927SAndroid Build Coastguard Worker    } else {
657*61046927SAndroid Build Coastguard Worker       nodearray_value *row = l->linear[l->spill_node].dense;
658*61046927SAndroid Build Coastguard Worker 
659*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < l->node_count; ++i) {
660*61046927SAndroid Build Coastguard Worker          /* Only spill nodes that interfere with the node failing
661*61046927SAndroid Build Coastguard Worker           * register allocation. It's pointless to spill anything else */
662*61046927SAndroid Build Coastguard Worker          if (!row[i])
663*61046927SAndroid Build Coastguard Worker             continue;
664*61046927SAndroid Build Coastguard Worker 
665*61046927SAndroid Build Coastguard Worker          if (BITSET_TEST(no_spill, i))
666*61046927SAndroid Build Coastguard Worker             continue;
667*61046927SAndroid Build Coastguard Worker 
668*61046927SAndroid Build Coastguard Worker          unsigned benefit = lcra_count_constraints(l, i);
669*61046927SAndroid Build Coastguard Worker 
670*61046927SAndroid Build Coastguard Worker          if (benefit > best_benefit) {
671*61046927SAndroid Build Coastguard Worker             best_benefit = benefit;
672*61046927SAndroid Build Coastguard Worker             best_node = i;
673*61046927SAndroid Build Coastguard Worker          }
674*61046927SAndroid Build Coastguard Worker       }
675*61046927SAndroid Build Coastguard Worker    }
676*61046927SAndroid Build Coastguard Worker 
677*61046927SAndroid Build Coastguard Worker    free(no_spill);
678*61046927SAndroid Build Coastguard Worker    return best_node;
679*61046927SAndroid Build Coastguard Worker }
680*61046927SAndroid Build Coastguard Worker 
681*61046927SAndroid Build Coastguard Worker static unsigned
bi_count_read_index(bi_instr * I,bi_index index)682*61046927SAndroid Build Coastguard Worker bi_count_read_index(bi_instr *I, bi_index index)
683*61046927SAndroid Build Coastguard Worker {
684*61046927SAndroid Build Coastguard Worker    unsigned max = 0;
685*61046927SAndroid Build Coastguard Worker 
686*61046927SAndroid Build Coastguard Worker    bi_foreach_src(I, s) {
687*61046927SAndroid Build Coastguard Worker       if (bi_is_equiv(I->src[s], index)) {
688*61046927SAndroid Build Coastguard Worker          unsigned count = bi_count_read_registers(I, s);
689*61046927SAndroid Build Coastguard Worker          max = MAX2(max, count + I->src[s].offset);
690*61046927SAndroid Build Coastguard Worker       }
691*61046927SAndroid Build Coastguard Worker    }
692*61046927SAndroid Build Coastguard Worker 
693*61046927SAndroid Build Coastguard Worker    return max;
694*61046927SAndroid Build Coastguard Worker }
695*61046927SAndroid Build Coastguard Worker 
696*61046927SAndroid Build Coastguard Worker /*
697*61046927SAndroid Build Coastguard Worker  * Wrappers to emit loads/stores to thread-local storage in an appropriate way
698*61046927SAndroid Build Coastguard Worker  * for the target, so the spill/fill code becomes architecture-independent.
699*61046927SAndroid Build Coastguard Worker  */
700*61046927SAndroid Build Coastguard Worker 
701*61046927SAndroid Build Coastguard Worker static bi_index
bi_tls_ptr(bool hi)702*61046927SAndroid Build Coastguard Worker bi_tls_ptr(bool hi)
703*61046927SAndroid Build Coastguard Worker {
704*61046927SAndroid Build Coastguard Worker    return bi_fau(BIR_FAU_TLS_PTR, hi);
705*61046927SAndroid Build Coastguard Worker }
706*61046927SAndroid Build Coastguard Worker 
707*61046927SAndroid Build Coastguard Worker static bi_instr *
bi_load_tl(bi_builder * b,unsigned bits,bi_index src,unsigned offset)708*61046927SAndroid Build Coastguard Worker bi_load_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset)
709*61046927SAndroid Build Coastguard Worker {
710*61046927SAndroid Build Coastguard Worker    if (b->shader->arch >= 9) {
711*61046927SAndroid Build Coastguard Worker       return bi_load_to(b, bits, src, bi_tls_ptr(false), bi_tls_ptr(true),
712*61046927SAndroid Build Coastguard Worker                         BI_SEG_TL, offset);
713*61046927SAndroid Build Coastguard Worker    } else {
714*61046927SAndroid Build Coastguard Worker       return bi_load_to(b, bits, src, bi_imm_u32(offset), bi_zero(), BI_SEG_TL,
715*61046927SAndroid Build Coastguard Worker                         0);
716*61046927SAndroid Build Coastguard Worker    }
717*61046927SAndroid Build Coastguard Worker }
718*61046927SAndroid Build Coastguard Worker 
719*61046927SAndroid Build Coastguard Worker static void
bi_store_tl(bi_builder * b,unsigned bits,bi_index src,unsigned offset)720*61046927SAndroid Build Coastguard Worker bi_store_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset)
721*61046927SAndroid Build Coastguard Worker {
722*61046927SAndroid Build Coastguard Worker    if (b->shader->arch >= 9) {
723*61046927SAndroid Build Coastguard Worker       bi_store(b, bits, src, bi_tls_ptr(false), bi_tls_ptr(true), BI_SEG_TL,
724*61046927SAndroid Build Coastguard Worker                offset);
725*61046927SAndroid Build Coastguard Worker    } else {
726*61046927SAndroid Build Coastguard Worker       bi_store(b, bits, src, bi_imm_u32(offset), bi_zero(), BI_SEG_TL, 0);
727*61046927SAndroid Build Coastguard Worker    }
728*61046927SAndroid Build Coastguard Worker }
729*61046927SAndroid Build Coastguard Worker 
730*61046927SAndroid Build Coastguard Worker /* Once we've chosen a spill node, spill it and returns bytes spilled */
731*61046927SAndroid Build Coastguard Worker 
732*61046927SAndroid Build Coastguard Worker static unsigned
bi_spill_register(bi_context * ctx,bi_index index,uint32_t offset)733*61046927SAndroid Build Coastguard Worker bi_spill_register(bi_context *ctx, bi_index index, uint32_t offset)
734*61046927SAndroid Build Coastguard Worker {
735*61046927SAndroid Build Coastguard Worker    bi_builder b = {.shader = ctx};
736*61046927SAndroid Build Coastguard Worker    unsigned channels = 0;
737*61046927SAndroid Build Coastguard Worker 
738*61046927SAndroid Build Coastguard Worker    /* Spill after every store, fill before every load */
739*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global_safe(ctx, I) {
740*61046927SAndroid Build Coastguard Worker       bi_foreach_dest(I, d) {
741*61046927SAndroid Build Coastguard Worker          if (!bi_is_equiv(I->dest[d], index))
742*61046927SAndroid Build Coastguard Worker             continue;
743*61046927SAndroid Build Coastguard Worker 
744*61046927SAndroid Build Coastguard Worker          unsigned extra = I->dest[d].offset;
745*61046927SAndroid Build Coastguard Worker          bi_index tmp = bi_temp(ctx);
746*61046927SAndroid Build Coastguard Worker 
747*61046927SAndroid Build Coastguard Worker          I->dest[d] = bi_replace_index(I->dest[d], tmp);
748*61046927SAndroid Build Coastguard Worker          I->no_spill = true;
749*61046927SAndroid Build Coastguard Worker 
750*61046927SAndroid Build Coastguard Worker          unsigned count = bi_count_write_registers(I, d);
751*61046927SAndroid Build Coastguard Worker          unsigned bits = count * 32;
752*61046927SAndroid Build Coastguard Worker 
753*61046927SAndroid Build Coastguard Worker          b.cursor = bi_after_instr(I);
754*61046927SAndroid Build Coastguard Worker          bi_store_tl(&b, bits, tmp, offset + 4 * extra);
755*61046927SAndroid Build Coastguard Worker 
756*61046927SAndroid Build Coastguard Worker          ctx->spills++;
757*61046927SAndroid Build Coastguard Worker          channels = MAX2(channels, extra + count);
758*61046927SAndroid Build Coastguard Worker       }
759*61046927SAndroid Build Coastguard Worker 
760*61046927SAndroid Build Coastguard Worker       if (bi_has_arg(I, index)) {
761*61046927SAndroid Build Coastguard Worker          b.cursor = bi_before_instr(I);
762*61046927SAndroid Build Coastguard Worker          bi_index tmp = bi_temp(ctx);
763*61046927SAndroid Build Coastguard Worker 
764*61046927SAndroid Build Coastguard Worker          unsigned bits = bi_count_read_index(I, index) * 32;
765*61046927SAndroid Build Coastguard Worker          bi_rewrite_index_src_single(I, index, tmp);
766*61046927SAndroid Build Coastguard Worker 
767*61046927SAndroid Build Coastguard Worker          bi_instr *ld = bi_load_tl(&b, bits, tmp, offset);
768*61046927SAndroid Build Coastguard Worker          ld->no_spill = true;
769*61046927SAndroid Build Coastguard Worker          ctx->fills++;
770*61046927SAndroid Build Coastguard Worker       }
771*61046927SAndroid Build Coastguard Worker    }
772*61046927SAndroid Build Coastguard Worker 
773*61046927SAndroid Build Coastguard Worker    return (channels * 4);
774*61046927SAndroid Build Coastguard Worker }
775*61046927SAndroid Build Coastguard Worker 
776*61046927SAndroid Build Coastguard Worker /*
777*61046927SAndroid Build Coastguard Worker  * For transition, lower collects and splits before RA, rather than after RA.
778*61046927SAndroid Build Coastguard Worker  * LCRA knows how to deal with offsets (broken SSA), but not how to coalesce
779*61046927SAndroid Build Coastguard Worker  * these vector moves.
780*61046927SAndroid Build Coastguard Worker  */
781*61046927SAndroid Build Coastguard Worker static void
bi_lower_vector(bi_context * ctx,unsigned first_reg)782*61046927SAndroid Build Coastguard Worker bi_lower_vector(bi_context *ctx, unsigned first_reg)
783*61046927SAndroid Build Coastguard Worker {
784*61046927SAndroid Build Coastguard Worker    bi_index *remap = calloc(ctx->ssa_alloc, sizeof(bi_index));
785*61046927SAndroid Build Coastguard Worker 
786*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global_safe(ctx, I) {
787*61046927SAndroid Build Coastguard Worker       bi_builder b = bi_init_builder(ctx, bi_after_instr(I));
788*61046927SAndroid Build Coastguard Worker 
789*61046927SAndroid Build Coastguard Worker       if (I->op == BI_OPCODE_SPLIT_I32) {
790*61046927SAndroid Build Coastguard Worker          bi_index src = I->src[0];
791*61046927SAndroid Build Coastguard Worker          assert(src.offset == 0);
792*61046927SAndroid Build Coastguard Worker 
793*61046927SAndroid Build Coastguard Worker          bi_foreach_dest(I, i) {
794*61046927SAndroid Build Coastguard Worker             src.offset = i;
795*61046927SAndroid Build Coastguard Worker             bi_mov_i32_to(&b, I->dest[i], src);
796*61046927SAndroid Build Coastguard Worker 
797*61046927SAndroid Build Coastguard Worker             if (I->dest[i].value < first_reg)
798*61046927SAndroid Build Coastguard Worker                remap[I->dest[i].value] = src;
799*61046927SAndroid Build Coastguard Worker          }
800*61046927SAndroid Build Coastguard Worker 
801*61046927SAndroid Build Coastguard Worker          bi_remove_instruction(I);
802*61046927SAndroid Build Coastguard Worker       } else if (I->op == BI_OPCODE_COLLECT_I32) {
803*61046927SAndroid Build Coastguard Worker          bi_index dest = I->dest[0];
804*61046927SAndroid Build Coastguard Worker          assert(dest.offset == 0);
805*61046927SAndroid Build Coastguard Worker          assert(((dest.value < first_reg) || I->nr_srcs == 1) &&
806*61046927SAndroid Build Coastguard Worker                 "nir_lower_phis_to_scalar");
807*61046927SAndroid Build Coastguard Worker 
808*61046927SAndroid Build Coastguard Worker          bi_foreach_src(I, i) {
809*61046927SAndroid Build Coastguard Worker             if (bi_is_null(I->src[i]))
810*61046927SAndroid Build Coastguard Worker                continue;
811*61046927SAndroid Build Coastguard Worker 
812*61046927SAndroid Build Coastguard Worker             dest.offset = i;
813*61046927SAndroid Build Coastguard Worker             bi_mov_i32_to(&b, dest, I->src[i]);
814*61046927SAndroid Build Coastguard Worker          }
815*61046927SAndroid Build Coastguard Worker 
816*61046927SAndroid Build Coastguard Worker          bi_remove_instruction(I);
817*61046927SAndroid Build Coastguard Worker       }
818*61046927SAndroid Build Coastguard Worker    }
819*61046927SAndroid Build Coastguard Worker 
820*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global(ctx, I) {
821*61046927SAndroid Build Coastguard Worker       bi_foreach_ssa_src(I, s) {
822*61046927SAndroid Build Coastguard Worker          if (I->src[s].value < first_reg && !bi_is_null(remap[I->src[s].value]))
823*61046927SAndroid Build Coastguard Worker             bi_replace_src(I, s, remap[I->src[s].value]);
824*61046927SAndroid Build Coastguard Worker       }
825*61046927SAndroid Build Coastguard Worker    }
826*61046927SAndroid Build Coastguard Worker 
827*61046927SAndroid Build Coastguard Worker    free(remap);
828*61046927SAndroid Build Coastguard Worker 
829*61046927SAndroid Build Coastguard Worker    /* After generating a pile of moves, clean up */
830*61046927SAndroid Build Coastguard Worker    bi_compute_liveness_ra(ctx);
831*61046927SAndroid Build Coastguard Worker 
832*61046927SAndroid Build Coastguard Worker    bi_foreach_block_rev(ctx, block) {
833*61046927SAndroid Build Coastguard Worker       uint8_t *live = rzalloc_array(block, uint8_t, ctx->ssa_alloc);
834*61046927SAndroid Build Coastguard Worker 
835*61046927SAndroid Build Coastguard Worker       bi_foreach_successor(block, succ) {
836*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < ctx->ssa_alloc; ++i)
837*61046927SAndroid Build Coastguard Worker             live[i] |= succ->live_in[i];
838*61046927SAndroid Build Coastguard Worker       }
839*61046927SAndroid Build Coastguard Worker 
840*61046927SAndroid Build Coastguard Worker       bi_foreach_instr_in_block_safe_rev(block, ins) {
841*61046927SAndroid Build Coastguard Worker          bool all_null = true;
842*61046927SAndroid Build Coastguard Worker 
843*61046927SAndroid Build Coastguard Worker          bi_foreach_dest(ins, d) {
844*61046927SAndroid Build Coastguard Worker             if (live[ins->dest[d].value] & bi_writemask(ins, d))
845*61046927SAndroid Build Coastguard Worker                all_null = false;
846*61046927SAndroid Build Coastguard Worker          }
847*61046927SAndroid Build Coastguard Worker 
848*61046927SAndroid Build Coastguard Worker          if (all_null && !bi_side_effects(ins))
849*61046927SAndroid Build Coastguard Worker             bi_remove_instruction(ins);
850*61046927SAndroid Build Coastguard Worker          else
851*61046927SAndroid Build Coastguard Worker             bi_liveness_ins_update_ra(live, ins);
852*61046927SAndroid Build Coastguard Worker       }
853*61046927SAndroid Build Coastguard Worker 
854*61046927SAndroid Build Coastguard Worker       ralloc_free(block->live_in);
855*61046927SAndroid Build Coastguard Worker       block->live_in = live;
856*61046927SAndroid Build Coastguard Worker    }
857*61046927SAndroid Build Coastguard Worker }
858*61046927SAndroid Build Coastguard Worker 
859*61046927SAndroid Build Coastguard Worker /*
860*61046927SAndroid Build Coastguard Worker  * Check if the instruction requires a "tied" operand. Such instructions MUST
861*61046927SAndroid Build Coastguard Worker  * allocate their source and destination to the same register. This is a
862*61046927SAndroid Build Coastguard Worker  * constraint on RA, and may require extra moves.
863*61046927SAndroid Build Coastguard Worker  *
864*61046927SAndroid Build Coastguard Worker  * In particular, this is the case for Bifrost instructions that both read and
865*61046927SAndroid Build Coastguard Worker  * write with the staging register mechanism.
866*61046927SAndroid Build Coastguard Worker  */
867*61046927SAndroid Build Coastguard Worker static bool
bi_is_tied(const bi_instr * I)868*61046927SAndroid Build Coastguard Worker bi_is_tied(const bi_instr *I)
869*61046927SAndroid Build Coastguard Worker {
870*61046927SAndroid Build Coastguard Worker    return (I->op == BI_OPCODE_TEXC || I->op == BI_OPCODE_TEXC_DUAL ||
871*61046927SAndroid Build Coastguard Worker            I->op == BI_OPCODE_ATOM_RETURN_I32 || I->op == BI_OPCODE_AXCHG_I32 ||
872*61046927SAndroid Build Coastguard Worker            I->op == BI_OPCODE_ACMPXCHG_I32) &&
873*61046927SAndroid Build Coastguard Worker           !bi_is_null(I->src[0]);
874*61046927SAndroid Build Coastguard Worker }
875*61046927SAndroid Build Coastguard Worker 
876*61046927SAndroid Build Coastguard Worker /*
877*61046927SAndroid Build Coastguard Worker  * For transition, coalesce tied operands together, as LCRA knows how to handle
878*61046927SAndroid Build Coastguard Worker  * non-SSA operands but doesn't know about tied operands.
879*61046927SAndroid Build Coastguard Worker  *
880*61046927SAndroid Build Coastguard Worker  * This breaks the SSA form of the program, but that doesn't matter for LCRA.
881*61046927SAndroid Build Coastguard Worker  */
882*61046927SAndroid Build Coastguard Worker static void
bi_coalesce_tied(bi_context * ctx)883*61046927SAndroid Build Coastguard Worker bi_coalesce_tied(bi_context *ctx)
884*61046927SAndroid Build Coastguard Worker {
885*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global(ctx, I) {
886*61046927SAndroid Build Coastguard Worker       if (!bi_is_tied(I))
887*61046927SAndroid Build Coastguard Worker          continue;
888*61046927SAndroid Build Coastguard Worker 
889*61046927SAndroid Build Coastguard Worker       bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
890*61046927SAndroid Build Coastguard Worker       unsigned n = bi_count_read_registers(I, 0);
891*61046927SAndroid Build Coastguard Worker 
892*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < n; ++i) {
893*61046927SAndroid Build Coastguard Worker          bi_index dst = I->dest[0], src = I->src[0];
894*61046927SAndroid Build Coastguard Worker 
895*61046927SAndroid Build Coastguard Worker          assert(dst.offset == 0 && src.offset == 0);
896*61046927SAndroid Build Coastguard Worker          dst.offset = src.offset = i;
897*61046927SAndroid Build Coastguard Worker 
898*61046927SAndroid Build Coastguard Worker          bi_mov_i32_to(&b, dst, src);
899*61046927SAndroid Build Coastguard Worker       }
900*61046927SAndroid Build Coastguard Worker 
901*61046927SAndroid Build Coastguard Worker       bi_replace_src(I, 0, I->dest[0]);
902*61046927SAndroid Build Coastguard Worker    }
903*61046927SAndroid Build Coastguard Worker }
904*61046927SAndroid Build Coastguard Worker 
905*61046927SAndroid Build Coastguard Worker static unsigned
find_or_allocate_temp(unsigned * map,unsigned value,unsigned * alloc)906*61046927SAndroid Build Coastguard Worker find_or_allocate_temp(unsigned *map, unsigned value, unsigned *alloc)
907*61046927SAndroid Build Coastguard Worker {
908*61046927SAndroid Build Coastguard Worker    if (!map[value])
909*61046927SAndroid Build Coastguard Worker       map[value] = ++(*alloc);
910*61046927SAndroid Build Coastguard Worker 
911*61046927SAndroid Build Coastguard Worker    assert(map[value]);
912*61046927SAndroid Build Coastguard Worker    return map[value] - 1;
913*61046927SAndroid Build Coastguard Worker }
914*61046927SAndroid Build Coastguard Worker 
915*61046927SAndroid Build Coastguard Worker /* Reassigns numbering to get rid of gaps in the indices and to prioritize
916*61046927SAndroid Build Coastguard Worker  * smaller register classes */
917*61046927SAndroid Build Coastguard Worker 
918*61046927SAndroid Build Coastguard Worker static void
squeeze_index(bi_context * ctx)919*61046927SAndroid Build Coastguard Worker squeeze_index(bi_context *ctx)
920*61046927SAndroid Build Coastguard Worker {
921*61046927SAndroid Build Coastguard Worker    unsigned *map = rzalloc_array(ctx, unsigned, ctx->ssa_alloc);
922*61046927SAndroid Build Coastguard Worker    ctx->ssa_alloc = 0;
923*61046927SAndroid Build Coastguard Worker 
924*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global(ctx, I) {
925*61046927SAndroid Build Coastguard Worker       bi_foreach_dest(I, d)
926*61046927SAndroid Build Coastguard Worker          I->dest[d].value =
927*61046927SAndroid Build Coastguard Worker             find_or_allocate_temp(map, I->dest[d].value, &ctx->ssa_alloc);
928*61046927SAndroid Build Coastguard Worker 
929*61046927SAndroid Build Coastguard Worker       bi_foreach_ssa_src(I, s)
930*61046927SAndroid Build Coastguard Worker          I->src[s].value =
931*61046927SAndroid Build Coastguard Worker             find_or_allocate_temp(map, I->src[s].value, &ctx->ssa_alloc);
932*61046927SAndroid Build Coastguard Worker    }
933*61046927SAndroid Build Coastguard Worker 
934*61046927SAndroid Build Coastguard Worker    ralloc_free(map);
935*61046927SAndroid Build Coastguard Worker }
936*61046927SAndroid Build Coastguard Worker 
937*61046927SAndroid Build Coastguard Worker /*
938*61046927SAndroid Build Coastguard Worker  * Brainless out-of-SSA pass. The eventual goal is to go out-of-SSA after RA and
939*61046927SAndroid Build Coastguard Worker  * coalesce implicitly with biased colouring in a tree scan allocator. For now,
940*61046927SAndroid Build Coastguard Worker  * this should be good enough for LCRA.
941*61046927SAndroid Build Coastguard Worker  */
942*61046927SAndroid Build Coastguard Worker static unsigned
bi_out_of_ssa(bi_context * ctx)943*61046927SAndroid Build Coastguard Worker bi_out_of_ssa(bi_context *ctx)
944*61046927SAndroid Build Coastguard Worker {
945*61046927SAndroid Build Coastguard Worker    bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false);
946*61046927SAndroid Build Coastguard Worker    unsigned first_reg = ctx->ssa_alloc;
947*61046927SAndroid Build Coastguard Worker 
948*61046927SAndroid Build Coastguard Worker    /* Trivially lower phis */
949*61046927SAndroid Build Coastguard Worker    bi_foreach_block(ctx, block) {
950*61046927SAndroid Build Coastguard Worker       bi_foreach_instr_in_block_safe(block, I) {
951*61046927SAndroid Build Coastguard Worker          if (I->op != BI_OPCODE_PHI)
952*61046927SAndroid Build Coastguard Worker             break;
953*61046927SAndroid Build Coastguard Worker 
954*61046927SAndroid Build Coastguard Worker          /* Assign a register for the phi */
955*61046927SAndroid Build Coastguard Worker          bi_index reg = bi_temp(ctx);
956*61046927SAndroid Build Coastguard Worker          assert(reg.value >= first_reg);
957*61046927SAndroid Build Coastguard Worker 
958*61046927SAndroid Build Coastguard Worker          /* Lower to a move in each predecessor. The destinations
959*61046927SAndroid Build Coastguard Worker           * cannot interfere so these can be sequentialized
960*61046927SAndroid Build Coastguard Worker           * in arbitrary order.
961*61046927SAndroid Build Coastguard Worker           */
962*61046927SAndroid Build Coastguard Worker          bi_foreach_predecessor(block, pred) {
963*61046927SAndroid Build Coastguard Worker             bi_builder b = bi_init_builder(ctx, bi_after_block_logical(*pred));
964*61046927SAndroid Build Coastguard Worker             unsigned i = bi_predecessor_index(block, *pred);
965*61046927SAndroid Build Coastguard Worker 
966*61046927SAndroid Build Coastguard Worker             assert(!I->src[i].abs);
967*61046927SAndroid Build Coastguard Worker             assert(!I->src[i].neg);
968*61046927SAndroid Build Coastguard Worker             assert(I->src[i].swizzle == BI_SWIZZLE_H01);
969*61046927SAndroid Build Coastguard Worker 
970*61046927SAndroid Build Coastguard Worker             /* MOV of immediate needs lowering on Valhall */
971*61046927SAndroid Build Coastguard Worker             if (ctx->arch >= 9 && I->src[i].type == BI_INDEX_CONSTANT)
972*61046927SAndroid Build Coastguard Worker                bi_iadd_imm_i32_to(&b, reg, zero, I->src[i].value);
973*61046927SAndroid Build Coastguard Worker             else
974*61046927SAndroid Build Coastguard Worker                bi_mov_i32_to(&b, reg, I->src[i]);
975*61046927SAndroid Build Coastguard Worker          }
976*61046927SAndroid Build Coastguard Worker 
977*61046927SAndroid Build Coastguard Worker          /* Replace the phi with a move */
978*61046927SAndroid Build Coastguard Worker          bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
979*61046927SAndroid Build Coastguard Worker          bi_mov_i32_to(&b, I->dest[0], reg);
980*61046927SAndroid Build Coastguard Worker          bi_remove_instruction(I);
981*61046927SAndroid Build Coastguard Worker 
982*61046927SAndroid Build Coastguard Worker          /* Propagate that move within the block. The destination
983*61046927SAndroid Build Coastguard Worker           * is SSA and the source is not written in this block,
984*61046927SAndroid Build Coastguard Worker           * so this is legal. The move itself will be DCE'd if
985*61046927SAndroid Build Coastguard Worker           * possible in the next pass.
986*61046927SAndroid Build Coastguard Worker           */
987*61046927SAndroid Build Coastguard Worker          bi_foreach_instr_in_block_rev(block, prop) {
988*61046927SAndroid Build Coastguard Worker             if (prop->op == BI_OPCODE_PHI)
989*61046927SAndroid Build Coastguard Worker                break;
990*61046927SAndroid Build Coastguard Worker 
991*61046927SAndroid Build Coastguard Worker             bi_foreach_src(prop, s) {
992*61046927SAndroid Build Coastguard Worker                if (bi_is_equiv(prop->src[s], I->dest[0])) {
993*61046927SAndroid Build Coastguard Worker                   bi_replace_src(prop, s, reg);
994*61046927SAndroid Build Coastguard Worker                }
995*61046927SAndroid Build Coastguard Worker             }
996*61046927SAndroid Build Coastguard Worker          }
997*61046927SAndroid Build Coastguard Worker       }
998*61046927SAndroid Build Coastguard Worker    }
999*61046927SAndroid Build Coastguard Worker 
1000*61046927SAndroid Build Coastguard Worker    /* Try to locally propagate the moves we created. We need to be extra
1001*61046927SAndroid Build Coastguard Worker     * careful because we're not in SSA at this point, as such this
1002*61046927SAndroid Build Coastguard Worker     * algorithm is quadratic. This will go away when we go out of SSA after
1003*61046927SAndroid Build Coastguard Worker     * RA.
1004*61046927SAndroid Build Coastguard Worker     */
1005*61046927SAndroid Build Coastguard Worker    BITSET_WORD *used =
1006*61046927SAndroid Build Coastguard Worker       calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
1007*61046927SAndroid Build Coastguard Worker    BITSET_WORD *multiple_uses =
1008*61046927SAndroid Build Coastguard Worker       calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
1009*61046927SAndroid Build Coastguard Worker 
1010*61046927SAndroid Build Coastguard Worker    bi_foreach_instr_global(ctx, I) {
1011*61046927SAndroid Build Coastguard Worker       bi_foreach_ssa_src(I, s) {
1012*61046927SAndroid Build Coastguard Worker          if (BITSET_TEST(used, I->src[s].value))
1013*61046927SAndroid Build Coastguard Worker             BITSET_SET(multiple_uses, I->src[s].value);
1014*61046927SAndroid Build Coastguard Worker          else
1015*61046927SAndroid Build Coastguard Worker             BITSET_SET(used, I->src[s].value);
1016*61046927SAndroid Build Coastguard Worker       }
1017*61046927SAndroid Build Coastguard Worker    }
1018*61046927SAndroid Build Coastguard Worker 
1019*61046927SAndroid Build Coastguard Worker    bi_foreach_block(ctx, block) {
1020*61046927SAndroid Build Coastguard Worker       bi_foreach_instr_in_block_safe_rev(block, mov) {
1021*61046927SAndroid Build Coastguard Worker          /* Match "reg = ssa" */
1022*61046927SAndroid Build Coastguard Worker          if (mov->op != BI_OPCODE_MOV_I32)
1023*61046927SAndroid Build Coastguard Worker             continue;
1024*61046927SAndroid Build Coastguard Worker          if (mov->dest[0].type != BI_INDEX_NORMAL)
1025*61046927SAndroid Build Coastguard Worker             continue;
1026*61046927SAndroid Build Coastguard Worker          if (mov->dest[0].value < first_reg)
1027*61046927SAndroid Build Coastguard Worker             continue;
1028*61046927SAndroid Build Coastguard Worker          if (!bi_is_ssa(mov->src[0]))
1029*61046927SAndroid Build Coastguard Worker             continue;
1030*61046927SAndroid Build Coastguard Worker          if (mov->src[0].value >= first_reg)
1031*61046927SAndroid Build Coastguard Worker             continue;
1032*61046927SAndroid Build Coastguard Worker          if (BITSET_TEST(multiple_uses, mov->src[0].value))
1033*61046927SAndroid Build Coastguard Worker             continue;
1034*61046927SAndroid Build Coastguard Worker 
1035*61046927SAndroid Build Coastguard Worker          bool found = false;
1036*61046927SAndroid Build Coastguard Worker 
1037*61046927SAndroid Build Coastguard Worker          /* Look locally for the write of the SSA */
1038*61046927SAndroid Build Coastguard Worker          bi_foreach_instr_in_block_rev(block, I) {
1039*61046927SAndroid Build Coastguard Worker             bool bail = false;
1040*61046927SAndroid Build Coastguard Worker 
1041*61046927SAndroid Build Coastguard Worker             bi_foreach_src(I, s) {
1042*61046927SAndroid Build Coastguard Worker                /* Bail: write-after-read */
1043*61046927SAndroid Build Coastguard Worker                if (bi_is_equiv(I->src[s], mov->dest[0]))
1044*61046927SAndroid Build Coastguard Worker                   bail = true;
1045*61046927SAndroid Build Coastguard Worker             }
1046*61046927SAndroid Build Coastguard Worker 
1047*61046927SAndroid Build Coastguard Worker             if (bail)
1048*61046927SAndroid Build Coastguard Worker                break;
1049*61046927SAndroid Build Coastguard Worker 
1050*61046927SAndroid Build Coastguard Worker             bi_foreach_dest(I, d) {
1051*61046927SAndroid Build Coastguard Worker                /* Bail: write-after-write */
1052*61046927SAndroid Build Coastguard Worker                if (bi_is_equiv(I->dest[d], mov->dest[0]))
1053*61046927SAndroid Build Coastguard Worker                   break;
1054*61046927SAndroid Build Coastguard Worker 
1055*61046927SAndroid Build Coastguard Worker                if (!bi_is_equiv(I->dest[d], mov->src[0]))
1056*61046927SAndroid Build Coastguard Worker                   continue;
1057*61046927SAndroid Build Coastguard Worker 
1058*61046927SAndroid Build Coastguard Worker                /* We found it, replace */
1059*61046927SAndroid Build Coastguard Worker                I->dest[d] = bi_replace_index(I->dest[d], mov->dest[0]);
1060*61046927SAndroid Build Coastguard Worker                found = true;
1061*61046927SAndroid Build Coastguard Worker                break;
1062*61046927SAndroid Build Coastguard Worker             }
1063*61046927SAndroid Build Coastguard Worker 
1064*61046927SAndroid Build Coastguard Worker             if (found)
1065*61046927SAndroid Build Coastguard Worker                break;
1066*61046927SAndroid Build Coastguard Worker          }
1067*61046927SAndroid Build Coastguard Worker 
1068*61046927SAndroid Build Coastguard Worker          if (found)
1069*61046927SAndroid Build Coastguard Worker             bi_remove_instruction(mov);
1070*61046927SAndroid Build Coastguard Worker       }
1071*61046927SAndroid Build Coastguard Worker    }
1072*61046927SAndroid Build Coastguard Worker 
1073*61046927SAndroid Build Coastguard Worker    free(used);
1074*61046927SAndroid Build Coastguard Worker    free(multiple_uses);
1075*61046927SAndroid Build Coastguard Worker    return first_reg;
1076*61046927SAndroid Build Coastguard Worker }
1077*61046927SAndroid Build Coastguard Worker 
1078*61046927SAndroid Build Coastguard Worker void
bi_register_allocate(bi_context * ctx)1079*61046927SAndroid Build Coastguard Worker bi_register_allocate(bi_context *ctx)
1080*61046927SAndroid Build Coastguard Worker {
1081*61046927SAndroid Build Coastguard Worker    struct lcra_state *l = NULL;
1082*61046927SAndroid Build Coastguard Worker    bool success = false;
1083*61046927SAndroid Build Coastguard Worker 
1084*61046927SAndroid Build Coastguard Worker    unsigned iter_count = 1000; /* max iterations */
1085*61046927SAndroid Build Coastguard Worker 
1086*61046927SAndroid Build Coastguard Worker    /* Number of bytes of memory we've spilled into */
1087*61046927SAndroid Build Coastguard Worker    unsigned spill_count = ctx->info.tls_size;
1088*61046927SAndroid Build Coastguard Worker 
1089*61046927SAndroid Build Coastguard Worker    if (ctx->arch >= 9)
1090*61046927SAndroid Build Coastguard Worker       va_lower_split_64bit(ctx);
1091*61046927SAndroid Build Coastguard Worker 
1092*61046927SAndroid Build Coastguard Worker    /* Lower tied operands. SSA is broken from here on. */
1093*61046927SAndroid Build Coastguard Worker    unsigned first_reg = bi_out_of_ssa(ctx);
1094*61046927SAndroid Build Coastguard Worker    bi_lower_vector(ctx, first_reg);
1095*61046927SAndroid Build Coastguard Worker    bi_coalesce_tied(ctx);
1096*61046927SAndroid Build Coastguard Worker    squeeze_index(ctx);
1097*61046927SAndroid Build Coastguard Worker 
1098*61046927SAndroid Build Coastguard Worker    /* Try with reduced register pressure to improve thread count */
1099*61046927SAndroid Build Coastguard Worker    if (ctx->arch >= 7) {
1100*61046927SAndroid Build Coastguard Worker       l = bi_allocate_registers(ctx, &success, false);
1101*61046927SAndroid Build Coastguard Worker 
1102*61046927SAndroid Build Coastguard Worker       if (success) {
1103*61046927SAndroid Build Coastguard Worker          ctx->info.work_reg_count = 32;
1104*61046927SAndroid Build Coastguard Worker       } else {
1105*61046927SAndroid Build Coastguard Worker          lcra_free(l);
1106*61046927SAndroid Build Coastguard Worker          l = NULL;
1107*61046927SAndroid Build Coastguard Worker       }
1108*61046927SAndroid Build Coastguard Worker    }
1109*61046927SAndroid Build Coastguard Worker 
1110*61046927SAndroid Build Coastguard Worker    /* Otherwise, use the register file and spill until we succeed */
1111*61046927SAndroid Build Coastguard Worker    while (!success && ((iter_count--) > 0)) {
1112*61046927SAndroid Build Coastguard Worker       l = bi_allocate_registers(ctx, &success, true);
1113*61046927SAndroid Build Coastguard Worker 
1114*61046927SAndroid Build Coastguard Worker       if (success) {
1115*61046927SAndroid Build Coastguard Worker          ctx->info.work_reg_count = 64;
1116*61046927SAndroid Build Coastguard Worker       } else {
1117*61046927SAndroid Build Coastguard Worker          signed spill_node = bi_choose_spill_node(ctx, l);
1118*61046927SAndroid Build Coastguard Worker          lcra_free(l);
1119*61046927SAndroid Build Coastguard Worker          l = NULL;
1120*61046927SAndroid Build Coastguard Worker 
1121*61046927SAndroid Build Coastguard Worker          if (spill_node == -1)
1122*61046927SAndroid Build Coastguard Worker             unreachable("Failed to choose spill node\n");
1123*61046927SAndroid Build Coastguard Worker 
1124*61046927SAndroid Build Coastguard Worker          if (ctx->inputs->is_blend)
1125*61046927SAndroid Build Coastguard Worker             unreachable("Blend shaders may not spill");
1126*61046927SAndroid Build Coastguard Worker 
1127*61046927SAndroid Build Coastguard Worker          /* By default, we use packed TLS addressing on Valhall.
1128*61046927SAndroid Build Coastguard Worker           * We cannot cross 16 byte boundaries with packed TLS
1129*61046927SAndroid Build Coastguard Worker           * addressing. Align to ensure this doesn't happen. This
1130*61046927SAndroid Build Coastguard Worker           * could be optimized a bit.
1131*61046927SAndroid Build Coastguard Worker           */
1132*61046927SAndroid Build Coastguard Worker          if (ctx->arch >= 9)
1133*61046927SAndroid Build Coastguard Worker             spill_count = ALIGN_POT(spill_count, 16);
1134*61046927SAndroid Build Coastguard Worker 
1135*61046927SAndroid Build Coastguard Worker          spill_count +=
1136*61046927SAndroid Build Coastguard Worker             bi_spill_register(ctx, bi_get_index(spill_node), spill_count);
1137*61046927SAndroid Build Coastguard Worker 
1138*61046927SAndroid Build Coastguard Worker          /* In case the spill affected an instruction with tied
1139*61046927SAndroid Build Coastguard Worker           * operands, we need to fix up.
1140*61046927SAndroid Build Coastguard Worker           */
1141*61046927SAndroid Build Coastguard Worker          bi_coalesce_tied(ctx);
1142*61046927SAndroid Build Coastguard Worker       }
1143*61046927SAndroid Build Coastguard Worker    }
1144*61046927SAndroid Build Coastguard Worker 
1145*61046927SAndroid Build Coastguard Worker    assert(success);
1146*61046927SAndroid Build Coastguard Worker    assert(l != NULL);
1147*61046927SAndroid Build Coastguard Worker 
1148*61046927SAndroid Build Coastguard Worker    ctx->info.tls_size = spill_count;
1149*61046927SAndroid Build Coastguard Worker    bi_install_registers(ctx, l);
1150*61046927SAndroid Build Coastguard Worker 
1151*61046927SAndroid Build Coastguard Worker    lcra_free(l);
1152*61046927SAndroid Build Coastguard Worker }
1153