xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_ra.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2021 Valve Corporation
3*61046927SAndroid Build Coastguard Worker  * Copyright © 2014 Rob Clark <[email protected]>
4*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker  */
6*61046927SAndroid Build Coastguard Worker 
7*61046927SAndroid Build Coastguard Worker #include "ir3_ra.h"
8*61046927SAndroid Build Coastguard Worker #include "util/rb_tree.h"
9*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
10*61046927SAndroid Build Coastguard Worker #include "ir3_shader.h"
11*61046927SAndroid Build Coastguard Worker 
12*61046927SAndroid Build Coastguard Worker /* This file implements an SSA-based register allocator. Unlike other
13*61046927SAndroid Build Coastguard Worker  * SSA-based allocators, it handles vector split/collect "smartly," meaning
14*61046927SAndroid Build Coastguard Worker  * that multiple values may share the same register interval. From the
15*61046927SAndroid Build Coastguard Worker  * perspective of the allocator itself, only the top-level intervals matter,
16*61046927SAndroid Build Coastguard Worker  * and the allocator is only concerned with allocating top-level intervals,
17*61046927SAndroid Build Coastguard Worker  * which may mean moving other top-level intervals around. Other intervals,
18*61046927SAndroid Build Coastguard Worker  * like the destination of a split instruction or the source of a collect
19*61046927SAndroid Build Coastguard Worker  * instruction, are "locked" to their parent interval. The details of this are
20*61046927SAndroid Build Coastguard Worker  * mostly handled by ir3_merge_regs and ir3_reg_ctx.
21*61046927SAndroid Build Coastguard Worker  *
22*61046927SAndroid Build Coastguard Worker  * We currently don't do any backtracking, but we do use the merge sets as a
23*61046927SAndroid Build Coastguard Worker  * form of affinity to try to avoid moves from phis/splits/collects. Each
24*61046927SAndroid Build Coastguard Worker  * merge set is what a more "classic" graph-coloring or live-range based
25*61046927SAndroid Build Coastguard Worker  * allocator would consider a single register, but here we use it as merely a
26*61046927SAndroid Build Coastguard Worker  * hint, except when multiple overlapping values are live at the same time.
27*61046927SAndroid Build Coastguard Worker  * Each merge set has a "preferred" register, and we try to honor that when
28*61046927SAndroid Build Coastguard Worker  * allocating values in the merge set.
29*61046927SAndroid Build Coastguard Worker  */
30*61046927SAndroid Build Coastguard Worker 
31*61046927SAndroid Build Coastguard Worker /* ir3_reg_ctx implementation. */
32*61046927SAndroid Build Coastguard Worker 
33*61046927SAndroid Build Coastguard Worker static int
ir3_reg_interval_cmp(const struct rb_node * node,const void * data)34*61046927SAndroid Build Coastguard Worker ir3_reg_interval_cmp(const struct rb_node *node, const void *data)
35*61046927SAndroid Build Coastguard Worker {
36*61046927SAndroid Build Coastguard Worker    unsigned reg = *(const unsigned *)data;
37*61046927SAndroid Build Coastguard Worker    const struct ir3_reg_interval *interval =
38*61046927SAndroid Build Coastguard Worker       ir3_rb_node_to_interval_const(node);
39*61046927SAndroid Build Coastguard Worker    if (interval->reg->interval_start > reg)
40*61046927SAndroid Build Coastguard Worker       return -1;
41*61046927SAndroid Build Coastguard Worker    else if (interval->reg->interval_end <= reg)
42*61046927SAndroid Build Coastguard Worker       return 1;
43*61046927SAndroid Build Coastguard Worker    else
44*61046927SAndroid Build Coastguard Worker       return 0;
45*61046927SAndroid Build Coastguard Worker }
46*61046927SAndroid Build Coastguard Worker 
47*61046927SAndroid Build Coastguard Worker static struct ir3_reg_interval *
ir3_reg_interval_search(struct rb_tree * tree,unsigned offset)48*61046927SAndroid Build Coastguard Worker ir3_reg_interval_search(struct rb_tree *tree, unsigned offset)
49*61046927SAndroid Build Coastguard Worker {
50*61046927SAndroid Build Coastguard Worker    struct rb_node *node = rb_tree_search(tree, &offset, ir3_reg_interval_cmp);
51*61046927SAndroid Build Coastguard Worker    return node ? ir3_rb_node_to_interval(node) : NULL;
52*61046927SAndroid Build Coastguard Worker }
53*61046927SAndroid Build Coastguard Worker 
54*61046927SAndroid Build Coastguard Worker static struct ir3_reg_interval *
ir3_reg_interval_search_sloppy(struct rb_tree * tree,unsigned offset)55*61046927SAndroid Build Coastguard Worker ir3_reg_interval_search_sloppy(struct rb_tree *tree, unsigned offset)
56*61046927SAndroid Build Coastguard Worker {
57*61046927SAndroid Build Coastguard Worker    struct rb_node *node =
58*61046927SAndroid Build Coastguard Worker       rb_tree_search_sloppy(tree, &offset, ir3_reg_interval_cmp);
59*61046927SAndroid Build Coastguard Worker    return node ? ir3_rb_node_to_interval(node) : NULL;
60*61046927SAndroid Build Coastguard Worker }
61*61046927SAndroid Build Coastguard Worker 
62*61046927SAndroid Build Coastguard Worker /* Get the interval covering the reg, or the closest to the right if it
63*61046927SAndroid Build Coastguard Worker  * doesn't exist.
64*61046927SAndroid Build Coastguard Worker  */
65*61046927SAndroid Build Coastguard Worker static struct ir3_reg_interval *
ir3_reg_interval_search_right(struct rb_tree * tree,unsigned offset)66*61046927SAndroid Build Coastguard Worker ir3_reg_interval_search_right(struct rb_tree *tree, unsigned offset)
67*61046927SAndroid Build Coastguard Worker {
68*61046927SAndroid Build Coastguard Worker    struct ir3_reg_interval *interval =
69*61046927SAndroid Build Coastguard Worker       ir3_reg_interval_search_sloppy(tree, offset);
70*61046927SAndroid Build Coastguard Worker    if (!interval) {
71*61046927SAndroid Build Coastguard Worker       return NULL;
72*61046927SAndroid Build Coastguard Worker    } else if (interval->reg->interval_end > offset) {
73*61046927SAndroid Build Coastguard Worker       return interval;
74*61046927SAndroid Build Coastguard Worker    } else {
75*61046927SAndroid Build Coastguard Worker       /* There is no interval covering reg, and ra_file_search_sloppy()
76*61046927SAndroid Build Coastguard Worker        * returned the closest range to the left, so the next interval to the
77*61046927SAndroid Build Coastguard Worker        * right should be the closest to the right.
78*61046927SAndroid Build Coastguard Worker        */
79*61046927SAndroid Build Coastguard Worker       return ir3_reg_interval_next_or_null(interval);
80*61046927SAndroid Build Coastguard Worker    }
81*61046927SAndroid Build Coastguard Worker }
82*61046927SAndroid Build Coastguard Worker 
83*61046927SAndroid Build Coastguard Worker static int
ir3_reg_interval_insert_cmp(const struct rb_node * _a,const struct rb_node * _b)84*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert_cmp(const struct rb_node *_a, const struct rb_node *_b)
85*61046927SAndroid Build Coastguard Worker {
86*61046927SAndroid Build Coastguard Worker    const struct ir3_reg_interval *a = ir3_rb_node_to_interval_const(_a);
87*61046927SAndroid Build Coastguard Worker    const struct ir3_reg_interval *b = ir3_rb_node_to_interval_const(_b);
88*61046927SAndroid Build Coastguard Worker    return b->reg->interval_start - a->reg->interval_start;
89*61046927SAndroid Build Coastguard Worker }
90*61046927SAndroid Build Coastguard Worker 
91*61046927SAndroid Build Coastguard Worker static void
interval_insert(struct ir3_reg_ctx * ctx,struct rb_tree * tree,struct ir3_reg_interval * interval)92*61046927SAndroid Build Coastguard Worker interval_insert(struct ir3_reg_ctx *ctx, struct rb_tree *tree,
93*61046927SAndroid Build Coastguard Worker                 struct ir3_reg_interval *interval)
94*61046927SAndroid Build Coastguard Worker {
95*61046927SAndroid Build Coastguard Worker    struct ir3_reg_interval *right =
96*61046927SAndroid Build Coastguard Worker       ir3_reg_interval_search_right(tree, interval->reg->interval_start);
97*61046927SAndroid Build Coastguard Worker    if (right && right->reg->interval_start < interval->reg->interval_end) {
98*61046927SAndroid Build Coastguard Worker       /* We disallow trees where different members have different half-ness.
99*61046927SAndroid Build Coastguard Worker        * This means that we can't treat bitcasts as copies like normal
100*61046927SAndroid Build Coastguard Worker        * split/collect, so something like this would require an extra copy
101*61046927SAndroid Build Coastguard Worker        * in mergedregs mode, and count as 4 half-units of register pressure
102*61046927SAndroid Build Coastguard Worker        * instead of 2:
103*61046927SAndroid Build Coastguard Worker        *
104*61046927SAndroid Build Coastguard Worker        * f16vec2 foo = unpackFloat2x16(bar)
105*61046927SAndroid Build Coastguard Worker        * ... = foo.x
106*61046927SAndroid Build Coastguard Worker        * ... = bar
107*61046927SAndroid Build Coastguard Worker        *
108*61046927SAndroid Build Coastguard Worker        * However, relaxing this rule would open a huge can of worms. What
109*61046927SAndroid Build Coastguard Worker        * happens when there's a vector of 16 things, and the fifth element
110*61046927SAndroid Build Coastguard Worker        * has been bitcasted as a half-reg? Would that element alone have to
111*61046927SAndroid Build Coastguard Worker        * be small enough to be used as a half-reg source? Let's keep that
112*61046927SAndroid Build Coastguard Worker        * can of worms firmly shut for now.
113*61046927SAndroid Build Coastguard Worker        */
114*61046927SAndroid Build Coastguard Worker       assert((interval->reg->flags & IR3_REG_HALF) ==
115*61046927SAndroid Build Coastguard Worker              (right->reg->flags & IR3_REG_HALF));
116*61046927SAndroid Build Coastguard Worker 
117*61046927SAndroid Build Coastguard Worker       if (right->reg->interval_end <= interval->reg->interval_end &&
118*61046927SAndroid Build Coastguard Worker           right->reg->interval_start >= interval->reg->interval_start) {
119*61046927SAndroid Build Coastguard Worker          /* Check if we're inserting something that's already inserted */
120*61046927SAndroid Build Coastguard Worker          assert(interval != right);
121*61046927SAndroid Build Coastguard Worker 
122*61046927SAndroid Build Coastguard Worker          /* "right" is contained in "interval" and must become a child of
123*61046927SAndroid Build Coastguard Worker           * it. There may be further children too.
124*61046927SAndroid Build Coastguard Worker           */
125*61046927SAndroid Build Coastguard Worker          for (struct ir3_reg_interval *next = ir3_reg_interval_next(right);
126*61046927SAndroid Build Coastguard Worker               right && right->reg->interval_start < interval->reg->interval_end;
127*61046927SAndroid Build Coastguard Worker               right = next, next = ir3_reg_interval_next_or_null(next)) {
128*61046927SAndroid Build Coastguard Worker             /* "right" must be contained in "interval." */
129*61046927SAndroid Build Coastguard Worker             assert(right->reg->interval_end <= interval->reg->interval_end);
130*61046927SAndroid Build Coastguard Worker             assert((interval->reg->flags & IR3_REG_HALF) ==
131*61046927SAndroid Build Coastguard Worker                    (right->reg->flags & IR3_REG_HALF));
132*61046927SAndroid Build Coastguard Worker             if (!right->parent)
133*61046927SAndroid Build Coastguard Worker                ctx->interval_delete(ctx, right);
134*61046927SAndroid Build Coastguard Worker             right->parent = interval;
135*61046927SAndroid Build Coastguard Worker             rb_tree_remove(tree, &right->node);
136*61046927SAndroid Build Coastguard Worker             rb_tree_insert(&interval->children, &right->node,
137*61046927SAndroid Build Coastguard Worker                            ir3_reg_interval_insert_cmp);
138*61046927SAndroid Build Coastguard Worker          }
139*61046927SAndroid Build Coastguard Worker       } else {
140*61046927SAndroid Build Coastguard Worker          /* "right" must contain "interval," since intervals must form a
141*61046927SAndroid Build Coastguard Worker           * tree.
142*61046927SAndroid Build Coastguard Worker           */
143*61046927SAndroid Build Coastguard Worker          assert(right->reg->interval_start <= interval->reg->interval_start);
144*61046927SAndroid Build Coastguard Worker          interval->parent = right;
145*61046927SAndroid Build Coastguard Worker          interval_insert(ctx, &right->children, interval);
146*61046927SAndroid Build Coastguard Worker          return;
147*61046927SAndroid Build Coastguard Worker       }
148*61046927SAndroid Build Coastguard Worker    }
149*61046927SAndroid Build Coastguard Worker 
150*61046927SAndroid Build Coastguard Worker    if (!interval->parent)
151*61046927SAndroid Build Coastguard Worker       ctx->interval_add(ctx, interval);
152*61046927SAndroid Build Coastguard Worker    rb_tree_insert(tree, &interval->node, ir3_reg_interval_insert_cmp);
153*61046927SAndroid Build Coastguard Worker    interval->inserted = true;
154*61046927SAndroid Build Coastguard Worker }
155*61046927SAndroid Build Coastguard Worker 
156*61046927SAndroid Build Coastguard Worker void
ir3_reg_interval_insert(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)157*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert(struct ir3_reg_ctx *ctx,
158*61046927SAndroid Build Coastguard Worker                         struct ir3_reg_interval *interval)
159*61046927SAndroid Build Coastguard Worker {
160*61046927SAndroid Build Coastguard Worker    rb_tree_init(&interval->children);
161*61046927SAndroid Build Coastguard Worker    interval->parent = NULL;
162*61046927SAndroid Build Coastguard Worker    interval_insert(ctx, &ctx->intervals, interval);
163*61046927SAndroid Build Coastguard Worker }
164*61046927SAndroid Build Coastguard Worker 
165*61046927SAndroid Build Coastguard Worker /* Call after ir3_reg_interval_remove_temp() to reinsert the interval */
166*61046927SAndroid Build Coastguard Worker static void
ir3_reg_interval_reinsert(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)167*61046927SAndroid Build Coastguard Worker ir3_reg_interval_reinsert(struct ir3_reg_ctx *ctx,
168*61046927SAndroid Build Coastguard Worker                           struct ir3_reg_interval *interval)
169*61046927SAndroid Build Coastguard Worker {
170*61046927SAndroid Build Coastguard Worker    interval->parent = NULL;
171*61046927SAndroid Build Coastguard Worker    interval_insert(ctx, &ctx->intervals, interval);
172*61046927SAndroid Build Coastguard Worker }
173*61046927SAndroid Build Coastguard Worker 
174*61046927SAndroid Build Coastguard Worker void
ir3_reg_interval_remove(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)175*61046927SAndroid Build Coastguard Worker ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
176*61046927SAndroid Build Coastguard Worker                         struct ir3_reg_interval *interval)
177*61046927SAndroid Build Coastguard Worker {
178*61046927SAndroid Build Coastguard Worker    assert(interval->inserted);
179*61046927SAndroid Build Coastguard Worker 
180*61046927SAndroid Build Coastguard Worker    if (interval->parent) {
181*61046927SAndroid Build Coastguard Worker       rb_tree_remove(&interval->parent->children, &interval->node);
182*61046927SAndroid Build Coastguard Worker    } else {
183*61046927SAndroid Build Coastguard Worker       ctx->interval_delete(ctx, interval);
184*61046927SAndroid Build Coastguard Worker       rb_tree_remove(&ctx->intervals, &interval->node);
185*61046927SAndroid Build Coastguard Worker    }
186*61046927SAndroid Build Coastguard Worker 
187*61046927SAndroid Build Coastguard Worker    rb_tree_foreach_safe (struct ir3_reg_interval, child, &interval->children,
188*61046927SAndroid Build Coastguard Worker                          node) {
189*61046927SAndroid Build Coastguard Worker       rb_tree_remove(&interval->children, &child->node);
190*61046927SAndroid Build Coastguard Worker       child->parent = interval->parent;
191*61046927SAndroid Build Coastguard Worker 
192*61046927SAndroid Build Coastguard Worker       if (interval->parent) {
193*61046927SAndroid Build Coastguard Worker          rb_tree_insert(&child->parent->children, &child->node,
194*61046927SAndroid Build Coastguard Worker                         ir3_reg_interval_insert_cmp);
195*61046927SAndroid Build Coastguard Worker       } else {
196*61046927SAndroid Build Coastguard Worker          ctx->interval_readd(ctx, interval, child);
197*61046927SAndroid Build Coastguard Worker          rb_tree_insert(&ctx->intervals, &child->node,
198*61046927SAndroid Build Coastguard Worker                         ir3_reg_interval_insert_cmp);
199*61046927SAndroid Build Coastguard Worker       }
200*61046927SAndroid Build Coastguard Worker    }
201*61046927SAndroid Build Coastguard Worker 
202*61046927SAndroid Build Coastguard Worker    interval->inserted = false;
203*61046927SAndroid Build Coastguard Worker }
204*61046927SAndroid Build Coastguard Worker 
205*61046927SAndroid Build Coastguard Worker static void
_mark_free(struct ir3_reg_interval * interval)206*61046927SAndroid Build Coastguard Worker _mark_free(struct ir3_reg_interval *interval)
207*61046927SAndroid Build Coastguard Worker {
208*61046927SAndroid Build Coastguard Worker    interval->inserted = false;
209*61046927SAndroid Build Coastguard Worker    rb_tree_foreach (struct ir3_reg_interval, child, &interval->children, node) {
210*61046927SAndroid Build Coastguard Worker       _mark_free(child);
211*61046927SAndroid Build Coastguard Worker    }
212*61046927SAndroid Build Coastguard Worker }
213*61046927SAndroid Build Coastguard Worker 
214*61046927SAndroid Build Coastguard Worker /* Remove an interval and all its children from the tree. */
215*61046927SAndroid Build Coastguard Worker void
ir3_reg_interval_remove_all(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)216*61046927SAndroid Build Coastguard Worker ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx,
217*61046927SAndroid Build Coastguard Worker                             struct ir3_reg_interval *interval)
218*61046927SAndroid Build Coastguard Worker {
219*61046927SAndroid Build Coastguard Worker    assert(!interval->parent);
220*61046927SAndroid Build Coastguard Worker 
221*61046927SAndroid Build Coastguard Worker    ctx->interval_delete(ctx, interval);
222*61046927SAndroid Build Coastguard Worker    rb_tree_remove(&ctx->intervals, &interval->node);
223*61046927SAndroid Build Coastguard Worker    _mark_free(interval);
224*61046927SAndroid Build Coastguard Worker }
225*61046927SAndroid Build Coastguard Worker 
226*61046927SAndroid Build Coastguard Worker /* Used when popping an interval to be shuffled around. Don't disturb children
227*61046927SAndroid Build Coastguard Worker  * so that it can be later reinserted.
228*61046927SAndroid Build Coastguard Worker  */
229*61046927SAndroid Build Coastguard Worker static void
ir3_reg_interval_remove_temp(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)230*61046927SAndroid Build Coastguard Worker ir3_reg_interval_remove_temp(struct ir3_reg_ctx *ctx,
231*61046927SAndroid Build Coastguard Worker                              struct ir3_reg_interval *interval)
232*61046927SAndroid Build Coastguard Worker {
233*61046927SAndroid Build Coastguard Worker    assert(!interval->parent);
234*61046927SAndroid Build Coastguard Worker 
235*61046927SAndroid Build Coastguard Worker    ctx->interval_delete(ctx, interval);
236*61046927SAndroid Build Coastguard Worker    rb_tree_remove(&ctx->intervals, &interval->node);
237*61046927SAndroid Build Coastguard Worker }
238*61046927SAndroid Build Coastguard Worker 
239*61046927SAndroid Build Coastguard Worker static void
interval_dump(struct log_stream * stream,struct ir3_reg_interval * interval,unsigned indent)240*61046927SAndroid Build Coastguard Worker interval_dump(struct log_stream *stream, struct ir3_reg_interval *interval,
241*61046927SAndroid Build Coastguard Worker               unsigned indent)
242*61046927SAndroid Build Coastguard Worker {
243*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < indent; i++)
244*61046927SAndroid Build Coastguard Worker       mesa_log_stream_printf(stream, "\t");
245*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "reg %u start %u\n", interval->reg->name,
246*61046927SAndroid Build Coastguard Worker                           interval->reg->interval_start);
247*61046927SAndroid Build Coastguard Worker 
248*61046927SAndroid Build Coastguard Worker    rb_tree_foreach (struct ir3_reg_interval, child, &interval->children, node) {
249*61046927SAndroid Build Coastguard Worker       interval_dump(stream, child, indent + 1);
250*61046927SAndroid Build Coastguard Worker    }
251*61046927SAndroid Build Coastguard Worker 
252*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < indent; i++)
253*61046927SAndroid Build Coastguard Worker       mesa_log_stream_printf(stream, "\t");
254*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "reg %u end %u\n", interval->reg->name,
255*61046927SAndroid Build Coastguard Worker                           interval->reg->interval_end);
256*61046927SAndroid Build Coastguard Worker }
257*61046927SAndroid Build Coastguard Worker 
258*61046927SAndroid Build Coastguard Worker void
ir3_reg_interval_dump(struct log_stream * stream,struct ir3_reg_interval * interval)259*61046927SAndroid Build Coastguard Worker ir3_reg_interval_dump(struct log_stream *stream, struct ir3_reg_interval *interval)
260*61046927SAndroid Build Coastguard Worker {
261*61046927SAndroid Build Coastguard Worker    interval_dump(stream, interval, 0);
262*61046927SAndroid Build Coastguard Worker }
263*61046927SAndroid Build Coastguard Worker 
264*61046927SAndroid Build Coastguard Worker /* These are the core datastructures used by the register allocator. First
265*61046927SAndroid Build Coastguard Worker  * ra_interval and ra_file, which are used for intra-block tracking and use
266*61046927SAndroid Build Coastguard Worker  * the ir3_reg_ctx infrastructure:
267*61046927SAndroid Build Coastguard Worker  */
268*61046927SAndroid Build Coastguard Worker 
269*61046927SAndroid Build Coastguard Worker struct ra_interval {
270*61046927SAndroid Build Coastguard Worker    struct ir3_reg_interval interval;
271*61046927SAndroid Build Coastguard Worker 
272*61046927SAndroid Build Coastguard Worker    struct rb_node physreg_node;
273*61046927SAndroid Build Coastguard Worker    physreg_t physreg_start, physreg_end;
274*61046927SAndroid Build Coastguard Worker 
275*61046927SAndroid Build Coastguard Worker    /* True if this is a source of the current instruction which is entirely
276*61046927SAndroid Build Coastguard Worker     * killed. This means we can allocate the dest over it, but we can't break
277*61046927SAndroid Build Coastguard Worker     * it up.
278*61046927SAndroid Build Coastguard Worker     */
279*61046927SAndroid Build Coastguard Worker    bool is_killed;
280*61046927SAndroid Build Coastguard Worker 
281*61046927SAndroid Build Coastguard Worker    /* True if this interval cannot be moved from its position. This is only
282*61046927SAndroid Build Coastguard Worker     * used for precolored inputs to ensure that other inputs don't get
283*61046927SAndroid Build Coastguard Worker     * allocated on top of them.
284*61046927SAndroid Build Coastguard Worker     */
285*61046927SAndroid Build Coastguard Worker    bool frozen;
286*61046927SAndroid Build Coastguard Worker };
287*61046927SAndroid Build Coastguard Worker 
288*61046927SAndroid Build Coastguard Worker struct ra_file {
289*61046927SAndroid Build Coastguard Worker    struct ir3_reg_ctx reg_ctx;
290*61046927SAndroid Build Coastguard Worker 
291*61046927SAndroid Build Coastguard Worker    BITSET_DECLARE(available, RA_MAX_FILE_SIZE);
292*61046927SAndroid Build Coastguard Worker    BITSET_DECLARE(available_to_evict, RA_MAX_FILE_SIZE);
293*61046927SAndroid Build Coastguard Worker 
294*61046927SAndroid Build Coastguard Worker    struct rb_tree physreg_intervals;
295*61046927SAndroid Build Coastguard Worker 
296*61046927SAndroid Build Coastguard Worker    unsigned size;
297*61046927SAndroid Build Coastguard Worker    unsigned start;
298*61046927SAndroid Build Coastguard Worker };
299*61046927SAndroid Build Coastguard Worker 
300*61046927SAndroid Build Coastguard Worker /* State for inter-block tracking. When we split a live range to make space
301*61046927SAndroid Build Coastguard Worker  * for a vector, we may need to insert fixup code when a block has multiple
302*61046927SAndroid Build Coastguard Worker  * predecessors that have moved the same live value to different registers.
303*61046927SAndroid Build Coastguard Worker  * This keeps track of state required to do that.
304*61046927SAndroid Build Coastguard Worker  */
305*61046927SAndroid Build Coastguard Worker 
306*61046927SAndroid Build Coastguard Worker struct ra_block_state {
307*61046927SAndroid Build Coastguard Worker    /* Map of defining ir3_register -> physreg it was allocated to at the end
308*61046927SAndroid Build Coastguard Worker     * of the block.
309*61046927SAndroid Build Coastguard Worker     */
310*61046927SAndroid Build Coastguard Worker    struct hash_table *renames;
311*61046927SAndroid Build Coastguard Worker 
312*61046927SAndroid Build Coastguard Worker    /* For loops, we need to process a block before all its predecessors have
313*61046927SAndroid Build Coastguard Worker     * been processed. In particular, we need to pick registers for values
314*61046927SAndroid Build Coastguard Worker     * without knowing if all the predecessors have been renamed. This keeps
315*61046927SAndroid Build Coastguard Worker     * track of the registers we chose so that when we visit the back-edge we
316*61046927SAndroid Build Coastguard Worker     * can move them appropriately. If all predecessors have been visited
317*61046927SAndroid Build Coastguard Worker     * before this block is visited then we don't need to fill this out. This
318*61046927SAndroid Build Coastguard Worker     * is a map from ir3_register -> physreg.
319*61046927SAndroid Build Coastguard Worker     */
320*61046927SAndroid Build Coastguard Worker    struct hash_table *entry_regs;
321*61046927SAndroid Build Coastguard Worker 
322*61046927SAndroid Build Coastguard Worker    /* True if the block has been visited and "renames" is complete.
323*61046927SAndroid Build Coastguard Worker     */
324*61046927SAndroid Build Coastguard Worker    bool visited;
325*61046927SAndroid Build Coastguard Worker };
326*61046927SAndroid Build Coastguard Worker 
327*61046927SAndroid Build Coastguard Worker struct ra_parallel_copy {
328*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval;
329*61046927SAndroid Build Coastguard Worker    physreg_t src;
330*61046927SAndroid Build Coastguard Worker };
331*61046927SAndroid Build Coastguard Worker 
332*61046927SAndroid Build Coastguard Worker /* The main context: */
333*61046927SAndroid Build Coastguard Worker 
334*61046927SAndroid Build Coastguard Worker struct ra_ctx {
335*61046927SAndroid Build Coastguard Worker    /* r0.x - r47.w. On a6xx with merged-regs, hr0.x-hr47.w go into the bottom
336*61046927SAndroid Build Coastguard Worker     * half of this file too.
337*61046927SAndroid Build Coastguard Worker     */
338*61046927SAndroid Build Coastguard Worker    struct ra_file full;
339*61046927SAndroid Build Coastguard Worker 
340*61046927SAndroid Build Coastguard Worker    /* hr0.x - hr63.w, only used without merged-regs. */
341*61046927SAndroid Build Coastguard Worker    struct ra_file half;
342*61046927SAndroid Build Coastguard Worker 
343*61046927SAndroid Build Coastguard Worker    /* Shared regs. */
344*61046927SAndroid Build Coastguard Worker    struct ra_file shared;
345*61046927SAndroid Build Coastguard Worker 
346*61046927SAndroid Build Coastguard Worker    struct ir3_liveness *live;
347*61046927SAndroid Build Coastguard Worker 
348*61046927SAndroid Build Coastguard Worker    struct ir3_block *block;
349*61046927SAndroid Build Coastguard Worker 
350*61046927SAndroid Build Coastguard Worker    const struct ir3_compiler *compiler;
351*61046927SAndroid Build Coastguard Worker    gl_shader_stage stage;
352*61046927SAndroid Build Coastguard Worker 
353*61046927SAndroid Build Coastguard Worker    /* Pending moves of top-level intervals that will be emitted once we're
354*61046927SAndroid Build Coastguard Worker     * finished:
355*61046927SAndroid Build Coastguard Worker     */
356*61046927SAndroid Build Coastguard Worker    DECLARE_ARRAY(struct ra_parallel_copy, parallel_copies);
357*61046927SAndroid Build Coastguard Worker 
358*61046927SAndroid Build Coastguard Worker    struct ra_interval *intervals;
359*61046927SAndroid Build Coastguard Worker    struct ra_block_state *blocks;
360*61046927SAndroid Build Coastguard Worker 
361*61046927SAndroid Build Coastguard Worker    bool merged_regs;
362*61046927SAndroid Build Coastguard Worker };
363*61046927SAndroid Build Coastguard Worker 
364*61046927SAndroid Build Coastguard Worker #define foreach_interval(interval, file)                                       \
365*61046927SAndroid Build Coastguard Worker    rb_tree_foreach (struct ra_interval, interval, &(file)->physreg_intervals,  \
366*61046927SAndroid Build Coastguard Worker                     physreg_node)
367*61046927SAndroid Build Coastguard Worker #define foreach_interval_rev(interval, file)                                   \
368*61046927SAndroid Build Coastguard Worker    rb_tree_foreach (struct ra_interval, interval, &(file)->physreg_intervals,  \
369*61046927SAndroid Build Coastguard Worker                     physreg_node)
370*61046927SAndroid Build Coastguard Worker #define foreach_interval_safe(interval, file)                                  \
371*61046927SAndroid Build Coastguard Worker    rb_tree_foreach_safe (struct ra_interval, interval,                         \
372*61046927SAndroid Build Coastguard Worker                          &(file)->physreg_intervals, physreg_node)
373*61046927SAndroid Build Coastguard Worker #define foreach_interval_rev_safe(interval, file)                              \
374*61046927SAndroid Build Coastguard Worker    rb_tree_foreach_rev_safe(struct ra_interval, interval,                      \
375*61046927SAndroid Build Coastguard Worker                             &(file)->physreg_intervals, physreg_node)
376*61046927SAndroid Build Coastguard Worker 
377*61046927SAndroid Build Coastguard Worker static struct ra_interval *
rb_node_to_interval(struct rb_node * node)378*61046927SAndroid Build Coastguard Worker rb_node_to_interval(struct rb_node *node)
379*61046927SAndroid Build Coastguard Worker {
380*61046927SAndroid Build Coastguard Worker    return rb_node_data(struct ra_interval, node, physreg_node);
381*61046927SAndroid Build Coastguard Worker }
382*61046927SAndroid Build Coastguard Worker 
383*61046927SAndroid Build Coastguard Worker static const struct ra_interval *
rb_node_to_interval_const(const struct rb_node * node)384*61046927SAndroid Build Coastguard Worker rb_node_to_interval_const(const struct rb_node *node)
385*61046927SAndroid Build Coastguard Worker {
386*61046927SAndroid Build Coastguard Worker    return rb_node_data(struct ra_interval, node, physreg_node);
387*61046927SAndroid Build Coastguard Worker }
388*61046927SAndroid Build Coastguard Worker 
389*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_interval_next(struct ra_interval * interval)390*61046927SAndroid Build Coastguard Worker ra_interval_next(struct ra_interval *interval)
391*61046927SAndroid Build Coastguard Worker {
392*61046927SAndroid Build Coastguard Worker    struct rb_node *next = rb_node_next(&interval->physreg_node);
393*61046927SAndroid Build Coastguard Worker    return next ? rb_node_to_interval(next) : NULL;
394*61046927SAndroid Build Coastguard Worker }
395*61046927SAndroid Build Coastguard Worker 
396*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_interval_next_or_null(struct ra_interval * interval)397*61046927SAndroid Build Coastguard Worker ra_interval_next_or_null(struct ra_interval *interval)
398*61046927SAndroid Build Coastguard Worker {
399*61046927SAndroid Build Coastguard Worker    return interval ? ra_interval_next(interval) : NULL;
400*61046927SAndroid Build Coastguard Worker }
401*61046927SAndroid Build Coastguard Worker 
402*61046927SAndroid Build Coastguard Worker static int
ra_interval_cmp(const struct rb_node * node,const void * data)403*61046927SAndroid Build Coastguard Worker ra_interval_cmp(const struct rb_node *node, const void *data)
404*61046927SAndroid Build Coastguard Worker {
405*61046927SAndroid Build Coastguard Worker    physreg_t reg = *(const physreg_t *)data;
406*61046927SAndroid Build Coastguard Worker    const struct ra_interval *interval = rb_node_to_interval_const(node);
407*61046927SAndroid Build Coastguard Worker    if (interval->physreg_start > reg)
408*61046927SAndroid Build Coastguard Worker       return -1;
409*61046927SAndroid Build Coastguard Worker    else if (interval->physreg_end <= reg)
410*61046927SAndroid Build Coastguard Worker       return 1;
411*61046927SAndroid Build Coastguard Worker    else
412*61046927SAndroid Build Coastguard Worker       return 0;
413*61046927SAndroid Build Coastguard Worker }
414*61046927SAndroid Build Coastguard Worker 
415*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_interval_search_sloppy(struct rb_tree * tree,physreg_t reg)416*61046927SAndroid Build Coastguard Worker ra_interval_search_sloppy(struct rb_tree *tree, physreg_t reg)
417*61046927SAndroid Build Coastguard Worker {
418*61046927SAndroid Build Coastguard Worker    struct rb_node *node = rb_tree_search_sloppy(tree, &reg, ra_interval_cmp);
419*61046927SAndroid Build Coastguard Worker    return node ? rb_node_to_interval(node) : NULL;
420*61046927SAndroid Build Coastguard Worker }
421*61046927SAndroid Build Coastguard Worker 
422*61046927SAndroid Build Coastguard Worker /* Get the interval covering the reg, or the closest to the right if it
423*61046927SAndroid Build Coastguard Worker  * doesn't exist.
424*61046927SAndroid Build Coastguard Worker  */
425*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_interval_search_right(struct rb_tree * tree,physreg_t reg)426*61046927SAndroid Build Coastguard Worker ra_interval_search_right(struct rb_tree *tree, physreg_t reg)
427*61046927SAndroid Build Coastguard Worker {
428*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = ra_interval_search_sloppy(tree, reg);
429*61046927SAndroid Build Coastguard Worker    if (!interval) {
430*61046927SAndroid Build Coastguard Worker       return NULL;
431*61046927SAndroid Build Coastguard Worker    } else if (interval->physreg_end > reg) {
432*61046927SAndroid Build Coastguard Worker       return interval;
433*61046927SAndroid Build Coastguard Worker    } else {
434*61046927SAndroid Build Coastguard Worker       /* There is no interval covering reg, and ra_file_search_sloppy()
435*61046927SAndroid Build Coastguard Worker        * returned the closest range to the left, so the next interval to the
436*61046927SAndroid Build Coastguard Worker        * right should be the closest to the right.
437*61046927SAndroid Build Coastguard Worker        */
438*61046927SAndroid Build Coastguard Worker       return ra_interval_next_or_null(interval);
439*61046927SAndroid Build Coastguard Worker    }
440*61046927SAndroid Build Coastguard Worker }
441*61046927SAndroid Build Coastguard Worker 
442*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_file_search_right(struct ra_file * file,physreg_t reg)443*61046927SAndroid Build Coastguard Worker ra_file_search_right(struct ra_file *file, physreg_t reg)
444*61046927SAndroid Build Coastguard Worker {
445*61046927SAndroid Build Coastguard Worker    return ra_interval_search_right(&file->physreg_intervals, reg);
446*61046927SAndroid Build Coastguard Worker }
447*61046927SAndroid Build Coastguard Worker 
448*61046927SAndroid Build Coastguard Worker static int
ra_interval_insert_cmp(const struct rb_node * _a,const struct rb_node * _b)449*61046927SAndroid Build Coastguard Worker ra_interval_insert_cmp(const struct rb_node *_a, const struct rb_node *_b)
450*61046927SAndroid Build Coastguard Worker {
451*61046927SAndroid Build Coastguard Worker    const struct ra_interval *a = rb_node_to_interval_const(_a);
452*61046927SAndroid Build Coastguard Worker    const struct ra_interval *b = rb_node_to_interval_const(_b);
453*61046927SAndroid Build Coastguard Worker    return b->physreg_start - a->physreg_start;
454*61046927SAndroid Build Coastguard Worker }
455*61046927SAndroid Build Coastguard Worker 
456*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ir3_reg_interval_to_ra_interval(struct ir3_reg_interval * interval)457*61046927SAndroid Build Coastguard Worker ir3_reg_interval_to_ra_interval(struct ir3_reg_interval *interval)
458*61046927SAndroid Build Coastguard Worker {
459*61046927SAndroid Build Coastguard Worker    return rb_node_data(struct ra_interval, interval, interval);
460*61046927SAndroid Build Coastguard Worker }
461*61046927SAndroid Build Coastguard Worker 
462*61046927SAndroid Build Coastguard Worker static struct ra_file *
ir3_reg_ctx_to_file(struct ir3_reg_ctx * ctx)463*61046927SAndroid Build Coastguard Worker ir3_reg_ctx_to_file(struct ir3_reg_ctx *ctx)
464*61046927SAndroid Build Coastguard Worker {
465*61046927SAndroid Build Coastguard Worker    return rb_node_data(struct ra_file, ctx, reg_ctx);
466*61046927SAndroid Build Coastguard Worker }
467*61046927SAndroid Build Coastguard Worker 
468*61046927SAndroid Build Coastguard Worker static void
interval_add(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * _interval)469*61046927SAndroid Build Coastguard Worker interval_add(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_interval)
470*61046927SAndroid Build Coastguard Worker {
471*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval);
472*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ir3_reg_ctx_to_file(ctx);
473*61046927SAndroid Build Coastguard Worker 
474*61046927SAndroid Build Coastguard Worker    /* We can assume in this case that physreg_start/physreg_end is already
475*61046927SAndroid Build Coastguard Worker     * initialized.
476*61046927SAndroid Build Coastguard Worker     */
477*61046927SAndroid Build Coastguard Worker    for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
478*61046927SAndroid Build Coastguard Worker       BITSET_CLEAR(file->available, i);
479*61046927SAndroid Build Coastguard Worker       BITSET_CLEAR(file->available_to_evict, i);
480*61046927SAndroid Build Coastguard Worker    }
481*61046927SAndroid Build Coastguard Worker 
482*61046927SAndroid Build Coastguard Worker    rb_tree_insert(&file->physreg_intervals, &interval->physreg_node,
483*61046927SAndroid Build Coastguard Worker                   ra_interval_insert_cmp);
484*61046927SAndroid Build Coastguard Worker }
485*61046927SAndroid Build Coastguard Worker 
486*61046927SAndroid Build Coastguard Worker static void
interval_delete(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * _interval)487*61046927SAndroid Build Coastguard Worker interval_delete(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_interval)
488*61046927SAndroid Build Coastguard Worker {
489*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval);
490*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ir3_reg_ctx_to_file(ctx);
491*61046927SAndroid Build Coastguard Worker 
492*61046927SAndroid Build Coastguard Worker    for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
493*61046927SAndroid Build Coastguard Worker       BITSET_SET(file->available, i);
494*61046927SAndroid Build Coastguard Worker       BITSET_SET(file->available_to_evict, i);
495*61046927SAndroid Build Coastguard Worker    }
496*61046927SAndroid Build Coastguard Worker 
497*61046927SAndroid Build Coastguard Worker    rb_tree_remove(&file->physreg_intervals, &interval->physreg_node);
498*61046927SAndroid Build Coastguard Worker }
499*61046927SAndroid Build Coastguard Worker 
500*61046927SAndroid Build Coastguard Worker static void
interval_readd(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * _parent,struct ir3_reg_interval * _child)501*61046927SAndroid Build Coastguard Worker interval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_parent,
502*61046927SAndroid Build Coastguard Worker                struct ir3_reg_interval *_child)
503*61046927SAndroid Build Coastguard Worker {
504*61046927SAndroid Build Coastguard Worker    struct ra_interval *parent = ir3_reg_interval_to_ra_interval(_parent);
505*61046927SAndroid Build Coastguard Worker    struct ra_interval *child = ir3_reg_interval_to_ra_interval(_child);
506*61046927SAndroid Build Coastguard Worker 
507*61046927SAndroid Build Coastguard Worker    child->physreg_start =
508*61046927SAndroid Build Coastguard Worker       parent->physreg_start + (child->interval.reg->interval_start -
509*61046927SAndroid Build Coastguard Worker                                parent->interval.reg->interval_start);
510*61046927SAndroid Build Coastguard Worker    child->physreg_end =
511*61046927SAndroid Build Coastguard Worker       child->physreg_start +
512*61046927SAndroid Build Coastguard Worker       (child->interval.reg->interval_end - child->interval.reg->interval_start);
513*61046927SAndroid Build Coastguard Worker 
514*61046927SAndroid Build Coastguard Worker    interval_add(ctx, _child);
515*61046927SAndroid Build Coastguard Worker }
516*61046927SAndroid Build Coastguard Worker 
517*61046927SAndroid Build Coastguard Worker static void
ra_file_init(struct ra_file * file)518*61046927SAndroid Build Coastguard Worker ra_file_init(struct ra_file *file)
519*61046927SAndroid Build Coastguard Worker {
520*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < file->size; i++) {
521*61046927SAndroid Build Coastguard Worker       BITSET_SET(file->available, i);
522*61046927SAndroid Build Coastguard Worker       BITSET_SET(file->available_to_evict, i);
523*61046927SAndroid Build Coastguard Worker    }
524*61046927SAndroid Build Coastguard Worker 
525*61046927SAndroid Build Coastguard Worker    rb_tree_init(&file->reg_ctx.intervals);
526*61046927SAndroid Build Coastguard Worker    rb_tree_init(&file->physreg_intervals);
527*61046927SAndroid Build Coastguard Worker 
528*61046927SAndroid Build Coastguard Worker    file->reg_ctx.interval_add = interval_add;
529*61046927SAndroid Build Coastguard Worker    file->reg_ctx.interval_delete = interval_delete;
530*61046927SAndroid Build Coastguard Worker    file->reg_ctx.interval_readd = interval_readd;
531*61046927SAndroid Build Coastguard Worker }
532*61046927SAndroid Build Coastguard Worker 
533*61046927SAndroid Build Coastguard Worker static void
ra_file_insert(struct ra_file * file,struct ra_interval * interval)534*61046927SAndroid Build Coastguard Worker ra_file_insert(struct ra_file *file, struct ra_interval *interval)
535*61046927SAndroid Build Coastguard Worker {
536*61046927SAndroid Build Coastguard Worker    assert(interval->physreg_start < interval->physreg_end);
537*61046927SAndroid Build Coastguard Worker    assert(interval->physreg_end <= file->size);
538*61046927SAndroid Build Coastguard Worker    if (interval->interval.reg->flags & IR3_REG_HALF)
539*61046927SAndroid Build Coastguard Worker       assert(interval->physreg_end <= RA_HALF_SIZE);
540*61046927SAndroid Build Coastguard Worker 
541*61046927SAndroid Build Coastguard Worker    ir3_reg_interval_insert(&file->reg_ctx, &interval->interval);
542*61046927SAndroid Build Coastguard Worker }
543*61046927SAndroid Build Coastguard Worker 
544*61046927SAndroid Build Coastguard Worker static void
ra_file_remove(struct ra_file * file,struct ra_interval * interval)545*61046927SAndroid Build Coastguard Worker ra_file_remove(struct ra_file *file, struct ra_interval *interval)
546*61046927SAndroid Build Coastguard Worker {
547*61046927SAndroid Build Coastguard Worker    ir3_reg_interval_remove(&file->reg_ctx, &interval->interval);
548*61046927SAndroid Build Coastguard Worker }
549*61046927SAndroid Build Coastguard Worker 
550*61046927SAndroid Build Coastguard Worker static void
ra_file_mark_killed(struct ra_file * file,struct ra_interval * interval)551*61046927SAndroid Build Coastguard Worker ra_file_mark_killed(struct ra_file *file, struct ra_interval *interval)
552*61046927SAndroid Build Coastguard Worker {
553*61046927SAndroid Build Coastguard Worker    assert(!interval->interval.parent);
554*61046927SAndroid Build Coastguard Worker 
555*61046927SAndroid Build Coastguard Worker    for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
556*61046927SAndroid Build Coastguard Worker       BITSET_SET(file->available, i);
557*61046927SAndroid Build Coastguard Worker    }
558*61046927SAndroid Build Coastguard Worker 
559*61046927SAndroid Build Coastguard Worker    interval->is_killed = true;
560*61046927SAndroid Build Coastguard Worker }
561*61046927SAndroid Build Coastguard Worker 
562*61046927SAndroid Build Coastguard Worker static void
ra_file_unmark_killed(struct ra_file * file,struct ra_interval * interval)563*61046927SAndroid Build Coastguard Worker ra_file_unmark_killed(struct ra_file *file, struct ra_interval *interval)
564*61046927SAndroid Build Coastguard Worker {
565*61046927SAndroid Build Coastguard Worker    assert(!interval->interval.parent);
566*61046927SAndroid Build Coastguard Worker 
567*61046927SAndroid Build Coastguard Worker    for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
568*61046927SAndroid Build Coastguard Worker       BITSET_CLEAR(file->available, i);
569*61046927SAndroid Build Coastguard Worker    }
570*61046927SAndroid Build Coastguard Worker 
571*61046927SAndroid Build Coastguard Worker    interval->is_killed = false;
572*61046927SAndroid Build Coastguard Worker }
573*61046927SAndroid Build Coastguard Worker 
574*61046927SAndroid Build Coastguard Worker static physreg_t
ra_interval_get_physreg(const struct ra_interval * interval)575*61046927SAndroid Build Coastguard Worker ra_interval_get_physreg(const struct ra_interval *interval)
576*61046927SAndroid Build Coastguard Worker {
577*61046927SAndroid Build Coastguard Worker    unsigned child_start = interval->interval.reg->interval_start;
578*61046927SAndroid Build Coastguard Worker 
579*61046927SAndroid Build Coastguard Worker    while (interval->interval.parent) {
580*61046927SAndroid Build Coastguard Worker       interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
581*61046927SAndroid Build Coastguard Worker    }
582*61046927SAndroid Build Coastguard Worker 
583*61046927SAndroid Build Coastguard Worker    return interval->physreg_start +
584*61046927SAndroid Build Coastguard Worker           (child_start - interval->interval.reg->interval_start);
585*61046927SAndroid Build Coastguard Worker }
586*61046927SAndroid Build Coastguard Worker 
587*61046927SAndroid Build Coastguard Worker static unsigned
ra_interval_get_num(const struct ra_interval * interval)588*61046927SAndroid Build Coastguard Worker ra_interval_get_num(const struct ra_interval *interval)
589*61046927SAndroid Build Coastguard Worker {
590*61046927SAndroid Build Coastguard Worker    return ra_physreg_to_num(ra_interval_get_physreg(interval),
591*61046927SAndroid Build Coastguard Worker                             interval->interval.reg->flags);
592*61046927SAndroid Build Coastguard Worker }
593*61046927SAndroid Build Coastguard Worker 
594*61046927SAndroid Build Coastguard Worker static void
ra_interval_init(struct ra_interval * interval,struct ir3_register * reg)595*61046927SAndroid Build Coastguard Worker ra_interval_init(struct ra_interval *interval, struct ir3_register *reg)
596*61046927SAndroid Build Coastguard Worker {
597*61046927SAndroid Build Coastguard Worker    ir3_reg_interval_init(&interval->interval, reg);
598*61046927SAndroid Build Coastguard Worker    interval->is_killed = false;
599*61046927SAndroid Build Coastguard Worker    interval->frozen = false;
600*61046927SAndroid Build Coastguard Worker }
601*61046927SAndroid Build Coastguard Worker 
602*61046927SAndroid Build Coastguard Worker static void
ra_interval_dump(struct log_stream * stream,struct ra_interval * interval)603*61046927SAndroid Build Coastguard Worker ra_interval_dump(struct log_stream *stream, struct ra_interval *interval)
604*61046927SAndroid Build Coastguard Worker {
605*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "physreg %u ", interval->physreg_start);
606*61046927SAndroid Build Coastguard Worker 
607*61046927SAndroid Build Coastguard Worker    ir3_reg_interval_dump(stream, &interval->interval);
608*61046927SAndroid Build Coastguard Worker }
609*61046927SAndroid Build Coastguard Worker 
610*61046927SAndroid Build Coastguard Worker static void
ra_file_dump(struct log_stream * stream,struct ra_file * file)611*61046927SAndroid Build Coastguard Worker ra_file_dump(struct log_stream *stream, struct ra_file *file)
612*61046927SAndroid Build Coastguard Worker {
613*61046927SAndroid Build Coastguard Worker    rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
614*61046927SAndroid Build Coastguard Worker                     physreg_node) {
615*61046927SAndroid Build Coastguard Worker       ra_interval_dump(stream, interval);
616*61046927SAndroid Build Coastguard Worker    }
617*61046927SAndroid Build Coastguard Worker 
618*61046927SAndroid Build Coastguard Worker    unsigned start, end;
619*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "available:\n");
620*61046927SAndroid Build Coastguard Worker    BITSET_FOREACH_RANGE (start, end, file->available, file->size) {
621*61046927SAndroid Build Coastguard Worker       mesa_log_stream_printf(stream, "%u-%u ", start, end);
622*61046927SAndroid Build Coastguard Worker    }
623*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "\n");
624*61046927SAndroid Build Coastguard Worker 
625*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "available to evict:\n");
626*61046927SAndroid Build Coastguard Worker    BITSET_FOREACH_RANGE (start, end, file->available_to_evict, file->size) {
627*61046927SAndroid Build Coastguard Worker       mesa_log_stream_printf(stream, "%u-%u ", start, end);
628*61046927SAndroid Build Coastguard Worker    }
629*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "\n");
630*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "start: %u\n", file->start);
631*61046927SAndroid Build Coastguard Worker }
632*61046927SAndroid Build Coastguard Worker 
633*61046927SAndroid Build Coastguard Worker static void
ra_ctx_dump(struct ra_ctx * ctx)634*61046927SAndroid Build Coastguard Worker ra_ctx_dump(struct ra_ctx *ctx)
635*61046927SAndroid Build Coastguard Worker {
636*61046927SAndroid Build Coastguard Worker    struct log_stream *stream = mesa_log_streami();
637*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "full:\n");
638*61046927SAndroid Build Coastguard Worker    ra_file_dump(stream, &ctx->full);
639*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "half:\n");
640*61046927SAndroid Build Coastguard Worker    ra_file_dump(stream, &ctx->half);
641*61046927SAndroid Build Coastguard Worker    mesa_log_stream_printf(stream, "shared:\n");
642*61046927SAndroid Build Coastguard Worker    ra_file_dump(stream, &ctx->shared);
643*61046927SAndroid Build Coastguard Worker    mesa_log_stream_destroy(stream);
644*61046927SAndroid Build Coastguard Worker }
645*61046927SAndroid Build Coastguard Worker 
646*61046927SAndroid Build Coastguard Worker static unsigned
reg_file_size(struct ra_file * file,struct ir3_register * reg)647*61046927SAndroid Build Coastguard Worker reg_file_size(struct ra_file *file, struct ir3_register *reg)
648*61046927SAndroid Build Coastguard Worker {
649*61046927SAndroid Build Coastguard Worker    /* Half-regs can only take up the first half of the combined regfile */
650*61046927SAndroid Build Coastguard Worker    if (reg->flags & IR3_REG_HALF) {
651*61046927SAndroid Build Coastguard Worker       if (reg->flags & IR3_REG_SHARED)
652*61046927SAndroid Build Coastguard Worker          return RA_SHARED_HALF_SIZE;
653*61046927SAndroid Build Coastguard Worker       else
654*61046927SAndroid Build Coastguard Worker          return MIN2(file->size, RA_HALF_SIZE);
655*61046927SAndroid Build Coastguard Worker    } else {
656*61046927SAndroid Build Coastguard Worker       return file->size;
657*61046927SAndroid Build Coastguard Worker    }
658*61046927SAndroid Build Coastguard Worker }
659*61046927SAndroid Build Coastguard Worker 
660*61046927SAndroid Build Coastguard Worker /* ra_pop_interval/ra_push_interval provide an API to shuffle around multiple
661*61046927SAndroid Build Coastguard Worker  * top-level intervals at once. Pop multiple intervals, then push them back in
662*61046927SAndroid Build Coastguard Worker  * any order.
663*61046927SAndroid Build Coastguard Worker  */
664*61046927SAndroid Build Coastguard Worker 
665*61046927SAndroid Build Coastguard Worker struct ra_removed_interval {
666*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval;
667*61046927SAndroid Build Coastguard Worker    unsigned size;
668*61046927SAndroid Build Coastguard Worker };
669*61046927SAndroid Build Coastguard Worker 
670*61046927SAndroid Build Coastguard Worker static struct ra_removed_interval
ra_pop_interval(struct ra_ctx * ctx,struct ra_file * file,struct ra_interval * interval)671*61046927SAndroid Build Coastguard Worker ra_pop_interval(struct ra_ctx *ctx, struct ra_file *file,
672*61046927SAndroid Build Coastguard Worker                 struct ra_interval *interval)
673*61046927SAndroid Build Coastguard Worker {
674*61046927SAndroid Build Coastguard Worker    assert(!interval->interval.parent);
675*61046927SAndroid Build Coastguard Worker    /* shared live splitting is not allowed! */
676*61046927SAndroid Build Coastguard Worker    assert(!(interval->interval.reg->flags & IR3_REG_SHARED));
677*61046927SAndroid Build Coastguard Worker 
678*61046927SAndroid Build Coastguard Worker    /* Check if we've already moved this reg before */
679*61046927SAndroid Build Coastguard Worker    unsigned pcopy_index;
680*61046927SAndroid Build Coastguard Worker    for (pcopy_index = 0; pcopy_index < ctx->parallel_copies_count;
681*61046927SAndroid Build Coastguard Worker         pcopy_index++) {
682*61046927SAndroid Build Coastguard Worker       if (ctx->parallel_copies[pcopy_index].interval == interval)
683*61046927SAndroid Build Coastguard Worker          break;
684*61046927SAndroid Build Coastguard Worker    }
685*61046927SAndroid Build Coastguard Worker 
686*61046927SAndroid Build Coastguard Worker    if (pcopy_index == ctx->parallel_copies_count) {
687*61046927SAndroid Build Coastguard Worker       array_insert(ctx, ctx->parallel_copies,
688*61046927SAndroid Build Coastguard Worker                    (struct ra_parallel_copy){
689*61046927SAndroid Build Coastguard Worker                       .interval = interval,
690*61046927SAndroid Build Coastguard Worker                       .src = interval->physreg_start,
691*61046927SAndroid Build Coastguard Worker                    });
692*61046927SAndroid Build Coastguard Worker    }
693*61046927SAndroid Build Coastguard Worker 
694*61046927SAndroid Build Coastguard Worker    ir3_reg_interval_remove_temp(&file->reg_ctx, &interval->interval);
695*61046927SAndroid Build Coastguard Worker 
696*61046927SAndroid Build Coastguard Worker    return (struct ra_removed_interval){
697*61046927SAndroid Build Coastguard Worker       .interval = interval,
698*61046927SAndroid Build Coastguard Worker       .size = interval->physreg_end - interval->physreg_start,
699*61046927SAndroid Build Coastguard Worker    };
700*61046927SAndroid Build Coastguard Worker }
701*61046927SAndroid Build Coastguard Worker 
702*61046927SAndroid Build Coastguard Worker static void
ra_push_interval(struct ra_ctx * ctx,struct ra_file * file,const struct ra_removed_interval * removed,physreg_t dst)703*61046927SAndroid Build Coastguard Worker ra_push_interval(struct ra_ctx *ctx, struct ra_file *file,
704*61046927SAndroid Build Coastguard Worker                  const struct ra_removed_interval *removed, physreg_t dst)
705*61046927SAndroid Build Coastguard Worker {
706*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = removed->interval;
707*61046927SAndroid Build Coastguard Worker 
708*61046927SAndroid Build Coastguard Worker    interval->physreg_start = dst;
709*61046927SAndroid Build Coastguard Worker    interval->physreg_end = dst + removed->size;
710*61046927SAndroid Build Coastguard Worker 
711*61046927SAndroid Build Coastguard Worker    assert(interval->physreg_end <= file->size);
712*61046927SAndroid Build Coastguard Worker    if (interval->interval.reg->flags & IR3_REG_HALF)
713*61046927SAndroid Build Coastguard Worker       assert(interval->physreg_end <= RA_HALF_SIZE);
714*61046927SAndroid Build Coastguard Worker 
715*61046927SAndroid Build Coastguard Worker    ir3_reg_interval_reinsert(&file->reg_ctx, &interval->interval);
716*61046927SAndroid Build Coastguard Worker }
717*61046927SAndroid Build Coastguard Worker 
718*61046927SAndroid Build Coastguard Worker /* Pick up the interval and place it at "dst". */
719*61046927SAndroid Build Coastguard Worker static void
ra_move_interval(struct ra_ctx * ctx,struct ra_file * file,struct ra_interval * interval,physreg_t dst)720*61046927SAndroid Build Coastguard Worker ra_move_interval(struct ra_ctx *ctx, struct ra_file *file,
721*61046927SAndroid Build Coastguard Worker                  struct ra_interval *interval, physreg_t dst)
722*61046927SAndroid Build Coastguard Worker {
723*61046927SAndroid Build Coastguard Worker    struct ra_removed_interval temp = ra_pop_interval(ctx, file, interval);
724*61046927SAndroid Build Coastguard Worker    ra_push_interval(ctx, file, &temp, dst);
725*61046927SAndroid Build Coastguard Worker }
726*61046927SAndroid Build Coastguard Worker 
727*61046927SAndroid Build Coastguard Worker static struct ra_file *
ra_get_file(struct ra_ctx * ctx,struct ir3_register * reg)728*61046927SAndroid Build Coastguard Worker ra_get_file(struct ra_ctx *ctx, struct ir3_register *reg)
729*61046927SAndroid Build Coastguard Worker {
730*61046927SAndroid Build Coastguard Worker    if (reg->flags & IR3_REG_SHARED)
731*61046927SAndroid Build Coastguard Worker       return &ctx->shared;
732*61046927SAndroid Build Coastguard Worker    else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
733*61046927SAndroid Build Coastguard Worker       return &ctx->full;
734*61046927SAndroid Build Coastguard Worker    else
735*61046927SAndroid Build Coastguard Worker       return &ctx->half;
736*61046927SAndroid Build Coastguard Worker }
737*61046927SAndroid Build Coastguard Worker 
738*61046927SAndroid Build Coastguard Worker 
739*61046927SAndroid Build Coastguard Worker /* Returns true if the proposed spot for "dst" or a killed source overlaps a
740*61046927SAndroid Build Coastguard Worker  * destination that's been allocated.
741*61046927SAndroid Build Coastguard Worker  */
742*61046927SAndroid Build Coastguard Worker static bool
check_dst_overlap(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * dst,physreg_t start,physreg_t end)743*61046927SAndroid Build Coastguard Worker check_dst_overlap(struct ra_ctx *ctx, struct ra_file *file,
744*61046927SAndroid Build Coastguard Worker                   struct ir3_register *dst, physreg_t start,
745*61046927SAndroid Build Coastguard Worker                   physreg_t end)
746*61046927SAndroid Build Coastguard Worker {
747*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *instr = dst->instr;
748*61046927SAndroid Build Coastguard Worker 
749*61046927SAndroid Build Coastguard Worker    ra_foreach_dst (other_dst, instr) {
750*61046927SAndroid Build Coastguard Worker       /* We assume only destinations before the current one have been allocated.
751*61046927SAndroid Build Coastguard Worker        */
752*61046927SAndroid Build Coastguard Worker       if (other_dst == dst)
753*61046927SAndroid Build Coastguard Worker          break;
754*61046927SAndroid Build Coastguard Worker 
755*61046927SAndroid Build Coastguard Worker       if (ra_get_file(ctx, other_dst) != file)
756*61046927SAndroid Build Coastguard Worker          continue;
757*61046927SAndroid Build Coastguard Worker 
758*61046927SAndroid Build Coastguard Worker       struct ra_interval *other_interval = &ctx->intervals[other_dst->name];
759*61046927SAndroid Build Coastguard Worker       assert(!other_interval->interval.parent);
760*61046927SAndroid Build Coastguard Worker       physreg_t other_start = other_interval->physreg_start;
761*61046927SAndroid Build Coastguard Worker       physreg_t other_end = other_interval->physreg_end;
762*61046927SAndroid Build Coastguard Worker 
763*61046927SAndroid Build Coastguard Worker       if (other_end > start && end > other_start)
764*61046927SAndroid Build Coastguard Worker          return true;
765*61046927SAndroid Build Coastguard Worker    }
766*61046927SAndroid Build Coastguard Worker 
767*61046927SAndroid Build Coastguard Worker    return false;
768*61046927SAndroid Build Coastguard Worker }
769*61046927SAndroid Build Coastguard Worker 
770*61046927SAndroid Build Coastguard Worker /* True if the destination is "early-clobber," meaning that it cannot be
771*61046927SAndroid Build Coastguard Worker  * allocated over killed sources. Some destinations always require it, but it
772*61046927SAndroid Build Coastguard Worker  * also is implicitly true for tied destinations whose source is live-through.
773*61046927SAndroid Build Coastguard Worker  * If the source is killed, then we skip allocating a register for the
774*61046927SAndroid Build Coastguard Worker  * destination altogether so we don't need to worry about that case here.
775*61046927SAndroid Build Coastguard Worker  */
776*61046927SAndroid Build Coastguard Worker static bool
is_early_clobber(struct ir3_register * reg)777*61046927SAndroid Build Coastguard Worker is_early_clobber(struct ir3_register *reg)
778*61046927SAndroid Build Coastguard Worker {
779*61046927SAndroid Build Coastguard Worker    return (reg->flags & IR3_REG_EARLY_CLOBBER) || reg->tied;
780*61046927SAndroid Build Coastguard Worker }
781*61046927SAndroid Build Coastguard Worker 
782*61046927SAndroid Build Coastguard Worker static bool
get_reg_specified(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg,physreg_t physreg,bool is_source)783*61046927SAndroid Build Coastguard Worker get_reg_specified(struct ra_ctx *ctx, struct ra_file *file,
784*61046927SAndroid Build Coastguard Worker                   struct ir3_register *reg, physreg_t physreg, bool is_source)
785*61046927SAndroid Build Coastguard Worker {
786*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < reg_size(reg); i++) {
787*61046927SAndroid Build Coastguard Worker       if (!BITSET_TEST(is_early_clobber(reg) || is_source ?
788*61046927SAndroid Build Coastguard Worker                            file->available_to_evict : file->available,
789*61046927SAndroid Build Coastguard Worker                        physreg + i))
790*61046927SAndroid Build Coastguard Worker          return false;
791*61046927SAndroid Build Coastguard Worker    }
792*61046927SAndroid Build Coastguard Worker 
793*61046927SAndroid Build Coastguard Worker    if (!is_source &&
794*61046927SAndroid Build Coastguard Worker        check_dst_overlap(ctx, file, reg, physreg, physreg + reg_size(reg)))
795*61046927SAndroid Build Coastguard Worker       return false;
796*61046927SAndroid Build Coastguard Worker 
797*61046927SAndroid Build Coastguard Worker    return true;
798*61046927SAndroid Build Coastguard Worker }
799*61046927SAndroid Build Coastguard Worker 
800*61046927SAndroid Build Coastguard Worker /* Try to evict any registers conflicting with the proposed spot "physreg" for
801*61046927SAndroid Build Coastguard Worker  * "reg". That is, move them to other places so that we can allocate "physreg"
802*61046927SAndroid Build Coastguard Worker  * here.
803*61046927SAndroid Build Coastguard Worker  */
804*61046927SAndroid Build Coastguard Worker 
805*61046927SAndroid Build Coastguard Worker static bool
try_evict_regs(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg,physreg_t physreg,unsigned * _eviction_count,bool is_source,bool speculative)806*61046927SAndroid Build Coastguard Worker try_evict_regs(struct ra_ctx *ctx, struct ra_file *file,
807*61046927SAndroid Build Coastguard Worker                struct ir3_register *reg, physreg_t physreg,
808*61046927SAndroid Build Coastguard Worker                unsigned *_eviction_count, bool is_source, bool speculative)
809*61046927SAndroid Build Coastguard Worker {
810*61046927SAndroid Build Coastguard Worker    BITSET_DECLARE(available_to_evict, RA_MAX_FILE_SIZE);
811*61046927SAndroid Build Coastguard Worker    memcpy(available_to_evict, file->available_to_evict,
812*61046927SAndroid Build Coastguard Worker           sizeof(available_to_evict));
813*61046927SAndroid Build Coastguard Worker 
814*61046927SAndroid Build Coastguard Worker    BITSET_DECLARE(available, RA_MAX_FILE_SIZE);
815*61046927SAndroid Build Coastguard Worker    memcpy(available, file->available, sizeof(available));
816*61046927SAndroid Build Coastguard Worker 
817*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < reg_size(reg); i++) {
818*61046927SAndroid Build Coastguard Worker       BITSET_CLEAR(available_to_evict, physreg + i);
819*61046927SAndroid Build Coastguard Worker       BITSET_CLEAR(available, physreg + i);
820*61046927SAndroid Build Coastguard Worker    }
821*61046927SAndroid Build Coastguard Worker 
822*61046927SAndroid Build Coastguard Worker    unsigned eviction_count = 0;
823*61046927SAndroid Build Coastguard Worker    /* Iterate over each range conflicting with physreg */
824*61046927SAndroid Build Coastguard Worker    for (struct ra_interval *conflicting = ra_file_search_right(file, physreg),
825*61046927SAndroid Build Coastguard Worker                            *next = ra_interval_next_or_null(conflicting);
826*61046927SAndroid Build Coastguard Worker         conflicting != NULL &&
827*61046927SAndroid Build Coastguard Worker         conflicting->physreg_start < physreg + reg_size(reg);
828*61046927SAndroid Build Coastguard Worker         conflicting = next, next = ra_interval_next_or_null(next)) {
829*61046927SAndroid Build Coastguard Worker       if (!is_early_clobber(reg) && !is_source && conflicting->is_killed)
830*61046927SAndroid Build Coastguard Worker          continue;
831*61046927SAndroid Build Coastguard Worker 
832*61046927SAndroid Build Coastguard Worker       if (conflicting->frozen) {
833*61046927SAndroid Build Coastguard Worker          assert(speculative);
834*61046927SAndroid Build Coastguard Worker          return false;
835*61046927SAndroid Build Coastguard Worker       }
836*61046927SAndroid Build Coastguard Worker 
837*61046927SAndroid Build Coastguard Worker       unsigned conflicting_file_size =
838*61046927SAndroid Build Coastguard Worker          reg_file_size(file, conflicting->interval.reg);
839*61046927SAndroid Build Coastguard Worker       unsigned avail_start, avail_end;
840*61046927SAndroid Build Coastguard Worker       bool evicted = false;
841*61046927SAndroid Build Coastguard Worker       BITSET_FOREACH_RANGE (avail_start, avail_end, available_to_evict,
842*61046927SAndroid Build Coastguard Worker                             conflicting_file_size) {
843*61046927SAndroid Build Coastguard Worker          unsigned size = avail_end - avail_start;
844*61046927SAndroid Build Coastguard Worker 
845*61046927SAndroid Build Coastguard Worker          /* non-half registers must be aligned */
846*61046927SAndroid Build Coastguard Worker          if (!(conflicting->interval.reg->flags & IR3_REG_HALF) &&
847*61046927SAndroid Build Coastguard Worker              avail_start % 2 == 1) {
848*61046927SAndroid Build Coastguard Worker             avail_start++;
849*61046927SAndroid Build Coastguard Worker             size--;
850*61046927SAndroid Build Coastguard Worker          }
851*61046927SAndroid Build Coastguard Worker 
852*61046927SAndroid Build Coastguard Worker          unsigned conflicting_size =
853*61046927SAndroid Build Coastguard Worker             conflicting->physreg_end - conflicting->physreg_start;
854*61046927SAndroid Build Coastguard Worker          if (size >= conflicting_size &&
855*61046927SAndroid Build Coastguard Worker              (is_source ||
856*61046927SAndroid Build Coastguard Worker               !check_dst_overlap(ctx, file, reg, avail_start,
857*61046927SAndroid Build Coastguard Worker                                  avail_start + conflicting_size))) {
858*61046927SAndroid Build Coastguard Worker             for (unsigned i = 0;
859*61046927SAndroid Build Coastguard Worker                  i < conflicting->physreg_end - conflicting->physreg_start; i++)
860*61046927SAndroid Build Coastguard Worker                BITSET_CLEAR(available_to_evict, avail_start + i);
861*61046927SAndroid Build Coastguard Worker             eviction_count +=
862*61046927SAndroid Build Coastguard Worker                conflicting->physreg_end - conflicting->physreg_start;
863*61046927SAndroid Build Coastguard Worker             if (!speculative)
864*61046927SAndroid Build Coastguard Worker                ra_move_interval(ctx, file, conflicting, avail_start);
865*61046927SAndroid Build Coastguard Worker             evicted = true;
866*61046927SAndroid Build Coastguard Worker             break;
867*61046927SAndroid Build Coastguard Worker          }
868*61046927SAndroid Build Coastguard Worker       }
869*61046927SAndroid Build Coastguard Worker 
870*61046927SAndroid Build Coastguard Worker       if (evicted)
871*61046927SAndroid Build Coastguard Worker          continue;
872*61046927SAndroid Build Coastguard Worker 
873*61046927SAndroid Build Coastguard Worker       /* If we couldn't evict this range, but the register we're allocating is
874*61046927SAndroid Build Coastguard Worker        * allowed to overlap with a killed range, then we may be able to swap it
875*61046927SAndroid Build Coastguard Worker        * with a killed range to acheive the same effect.
876*61046927SAndroid Build Coastguard Worker        */
877*61046927SAndroid Build Coastguard Worker       if (is_early_clobber(reg) || is_source)
878*61046927SAndroid Build Coastguard Worker          return false;
879*61046927SAndroid Build Coastguard Worker 
880*61046927SAndroid Build Coastguard Worker       foreach_interval (killed, file) {
881*61046927SAndroid Build Coastguard Worker          if (!killed->is_killed)
882*61046927SAndroid Build Coastguard Worker             continue;
883*61046927SAndroid Build Coastguard Worker 
884*61046927SAndroid Build Coastguard Worker          if (killed->physreg_end - killed->physreg_start !=
885*61046927SAndroid Build Coastguard Worker              conflicting->physreg_end - conflicting->physreg_start)
886*61046927SAndroid Build Coastguard Worker             continue;
887*61046927SAndroid Build Coastguard Worker 
888*61046927SAndroid Build Coastguard Worker          if (killed->physreg_end > conflicting_file_size ||
889*61046927SAndroid Build Coastguard Worker              conflicting->physreg_end > reg_file_size(file, killed->interval.reg))
890*61046927SAndroid Build Coastguard Worker             continue;
891*61046927SAndroid Build Coastguard Worker 
892*61046927SAndroid Build Coastguard Worker          /* We can't swap the killed range if it partially/fully overlaps the
893*61046927SAndroid Build Coastguard Worker           * space we're trying to allocate or (in speculative mode) if it's
894*61046927SAndroid Build Coastguard Worker           * already been swapped and will overlap when we actually evict.
895*61046927SAndroid Build Coastguard Worker           */
896*61046927SAndroid Build Coastguard Worker          bool killed_available = true;
897*61046927SAndroid Build Coastguard Worker          for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) {
898*61046927SAndroid Build Coastguard Worker             if (!BITSET_TEST(available, i)) {
899*61046927SAndroid Build Coastguard Worker                killed_available = false;
900*61046927SAndroid Build Coastguard Worker                break;
901*61046927SAndroid Build Coastguard Worker             }
902*61046927SAndroid Build Coastguard Worker          }
903*61046927SAndroid Build Coastguard Worker 
904*61046927SAndroid Build Coastguard Worker          if (!killed_available)
905*61046927SAndroid Build Coastguard Worker             continue;
906*61046927SAndroid Build Coastguard Worker 
907*61046927SAndroid Build Coastguard Worker          if (check_dst_overlap(ctx, file, reg, killed->physreg_start,
908*61046927SAndroid Build Coastguard Worker                                killed->physreg_end))
909*61046927SAndroid Build Coastguard Worker             continue;
910*61046927SAndroid Build Coastguard Worker 
911*61046927SAndroid Build Coastguard Worker          /* Check for alignment if one is a full reg */
912*61046927SAndroid Build Coastguard Worker          if ((!(killed->interval.reg->flags & IR3_REG_HALF) ||
913*61046927SAndroid Build Coastguard Worker               !(conflicting->interval.reg->flags & IR3_REG_HALF)) &&
914*61046927SAndroid Build Coastguard Worker              (killed->physreg_start % 2 != 0 ||
915*61046927SAndroid Build Coastguard Worker               conflicting->physreg_start % 2 != 0))
916*61046927SAndroid Build Coastguard Worker             continue;
917*61046927SAndroid Build Coastguard Worker 
918*61046927SAndroid Build Coastguard Worker          for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) {
919*61046927SAndroid Build Coastguard Worker             BITSET_CLEAR(available, i);
920*61046927SAndroid Build Coastguard Worker          }
921*61046927SAndroid Build Coastguard Worker          /* Because this will generate swaps instead of moves, multiply the
922*61046927SAndroid Build Coastguard Worker           * cost by 2.
923*61046927SAndroid Build Coastguard Worker           */
924*61046927SAndroid Build Coastguard Worker          eviction_count += (killed->physreg_end - killed->physreg_start) * 2;
925*61046927SAndroid Build Coastguard Worker          if (!speculative) {
926*61046927SAndroid Build Coastguard Worker             physreg_t killed_start = killed->physreg_start,
927*61046927SAndroid Build Coastguard Worker                       conflicting_start = conflicting->physreg_start;
928*61046927SAndroid Build Coastguard Worker             struct ra_removed_interval killed_removed =
929*61046927SAndroid Build Coastguard Worker                ra_pop_interval(ctx, file, killed);
930*61046927SAndroid Build Coastguard Worker             struct ra_removed_interval conflicting_removed =
931*61046927SAndroid Build Coastguard Worker                ra_pop_interval(ctx, file, conflicting);
932*61046927SAndroid Build Coastguard Worker             ra_push_interval(ctx, file, &killed_removed, conflicting_start);
933*61046927SAndroid Build Coastguard Worker             ra_push_interval(ctx, file, &conflicting_removed, killed_start);
934*61046927SAndroid Build Coastguard Worker          }
935*61046927SAndroid Build Coastguard Worker 
936*61046927SAndroid Build Coastguard Worker          evicted = true;
937*61046927SAndroid Build Coastguard Worker          break;
938*61046927SAndroid Build Coastguard Worker       }
939*61046927SAndroid Build Coastguard Worker 
940*61046927SAndroid Build Coastguard Worker       if (!evicted)
941*61046927SAndroid Build Coastguard Worker          return false;
942*61046927SAndroid Build Coastguard Worker    }
943*61046927SAndroid Build Coastguard Worker 
944*61046927SAndroid Build Coastguard Worker    *_eviction_count = eviction_count;
945*61046927SAndroid Build Coastguard Worker    return true;
946*61046927SAndroid Build Coastguard Worker }
947*61046927SAndroid Build Coastguard Worker 
948*61046927SAndroid Build Coastguard Worker static int
removed_interval_cmp(const void * _i1,const void * _i2)949*61046927SAndroid Build Coastguard Worker removed_interval_cmp(const void *_i1, const void *_i2)
950*61046927SAndroid Build Coastguard Worker {
951*61046927SAndroid Build Coastguard Worker    const struct ra_removed_interval *i1 = _i1;
952*61046927SAndroid Build Coastguard Worker    const struct ra_removed_interval *i2 = _i2;
953*61046927SAndroid Build Coastguard Worker 
954*61046927SAndroid Build Coastguard Worker    /* We sort the registers as follows:
955*61046927SAndroid Build Coastguard Worker     *
956*61046927SAndroid Build Coastguard Worker     * |------------------------------------------------------------------------------------------|
957*61046927SAndroid Build Coastguard Worker     * |               |                    |        |        |                    |              |
958*61046927SAndroid Build Coastguard Worker     * |  Half         | Half early-clobber | Half   | Full   | Full early-clobber | Full         |
959*61046927SAndroid Build Coastguard Worker     * |  live-through | destination        | killed | killed | destination        | live-through |
960*61046927SAndroid Build Coastguard Worker     * |               |                    |        |        |                    |              |
961*61046927SAndroid Build Coastguard Worker     * |------------------------------------------------------------------------------------------|
962*61046927SAndroid Build Coastguard Worker     *                                      |                 |
963*61046927SAndroid Build Coastguard Worker     *                                      |   Destination   |
964*61046927SAndroid Build Coastguard Worker     *                                      |                 |
965*61046927SAndroid Build Coastguard Worker     *                                      |-----------------|
966*61046927SAndroid Build Coastguard Worker     *
967*61046927SAndroid Build Coastguard Worker     * Half-registers have to be first so that they stay in the low half of
968*61046927SAndroid Build Coastguard Worker     * the register file. Then half and full killed must stay together so that
969*61046927SAndroid Build Coastguard Worker     * there's a contiguous range where we can put the register. With this
970*61046927SAndroid Build Coastguard Worker     * structure we should be able to accomodate any collection of intervals
971*61046927SAndroid Build Coastguard Worker     * such that the total number of half components is within the half limit
972*61046927SAndroid Build Coastguard Worker     * and the combined components are within the full limit.
973*61046927SAndroid Build Coastguard Worker     */
974*61046927SAndroid Build Coastguard Worker 
975*61046927SAndroid Build Coastguard Worker    unsigned i1_align = reg_elem_size(i1->interval->interval.reg);
976*61046927SAndroid Build Coastguard Worker    unsigned i2_align = reg_elem_size(i2->interval->interval.reg);
977*61046927SAndroid Build Coastguard Worker    if (i1_align > i2_align)
978*61046927SAndroid Build Coastguard Worker       return 1;
979*61046927SAndroid Build Coastguard Worker    if (i1_align < i2_align)
980*61046927SAndroid Build Coastguard Worker       return -1;
981*61046927SAndroid Build Coastguard Worker 
982*61046927SAndroid Build Coastguard Worker    if (i1_align == 1) {
983*61046927SAndroid Build Coastguard Worker       if (i2->interval->is_killed)
984*61046927SAndroid Build Coastguard Worker          return -1;
985*61046927SAndroid Build Coastguard Worker       if (i1->interval->is_killed)
986*61046927SAndroid Build Coastguard Worker          return 1;
987*61046927SAndroid Build Coastguard Worker    } else {
988*61046927SAndroid Build Coastguard Worker       if (i2->interval->is_killed)
989*61046927SAndroid Build Coastguard Worker          return 1;
990*61046927SAndroid Build Coastguard Worker       if (i1->interval->is_killed)
991*61046927SAndroid Build Coastguard Worker          return -1;
992*61046927SAndroid Build Coastguard Worker    }
993*61046927SAndroid Build Coastguard Worker 
994*61046927SAndroid Build Coastguard Worker    return 0;
995*61046927SAndroid Build Coastguard Worker }
996*61046927SAndroid Build Coastguard Worker 
997*61046927SAndroid Build Coastguard Worker static int
dsts_cmp(const void * _i1,const void * _i2)998*61046927SAndroid Build Coastguard Worker dsts_cmp(const void *_i1, const void *_i2)
999*61046927SAndroid Build Coastguard Worker {
1000*61046927SAndroid Build Coastguard Worker    struct ir3_register *i1 = *(struct ir3_register *const *) _i1;
1001*61046927SAndroid Build Coastguard Worker    struct ir3_register *i2 = *(struct ir3_register *const *) _i2;
1002*61046927SAndroid Build Coastguard Worker 
1003*61046927SAndroid Build Coastguard Worker    /* Treat tied destinations as-if they are live-through sources, and normal
1004*61046927SAndroid Build Coastguard Worker     * destinations as killed sources.
1005*61046927SAndroid Build Coastguard Worker     */
1006*61046927SAndroid Build Coastguard Worker    unsigned i1_align = reg_elem_size(i1);
1007*61046927SAndroid Build Coastguard Worker    unsigned i2_align = reg_elem_size(i2);
1008*61046927SAndroid Build Coastguard Worker    if (i1_align > i2_align)
1009*61046927SAndroid Build Coastguard Worker       return 1;
1010*61046927SAndroid Build Coastguard Worker    if (i1_align < i2_align)
1011*61046927SAndroid Build Coastguard Worker       return -1;
1012*61046927SAndroid Build Coastguard Worker 
1013*61046927SAndroid Build Coastguard Worker    if (i1_align == 1) {
1014*61046927SAndroid Build Coastguard Worker       if (!is_early_clobber(i2))
1015*61046927SAndroid Build Coastguard Worker          return -1;
1016*61046927SAndroid Build Coastguard Worker       if (!is_early_clobber(i1))
1017*61046927SAndroid Build Coastguard Worker          return 1;
1018*61046927SAndroid Build Coastguard Worker    } else {
1019*61046927SAndroid Build Coastguard Worker       if (!is_early_clobber(i2))
1020*61046927SAndroid Build Coastguard Worker          return 1;
1021*61046927SAndroid Build Coastguard Worker       if (!is_early_clobber(i1))
1022*61046927SAndroid Build Coastguard Worker          return -1;
1023*61046927SAndroid Build Coastguard Worker    }
1024*61046927SAndroid Build Coastguard Worker 
1025*61046927SAndroid Build Coastguard Worker    return 0;
1026*61046927SAndroid Build Coastguard Worker }
1027*61046927SAndroid Build Coastguard Worker 
1028*61046927SAndroid Build Coastguard Worker /* "Compress" all the live intervals so that there is enough space for the
1029*61046927SAndroid Build Coastguard Worker  * destination register. As there can be gaps when a more-aligned interval
1030*61046927SAndroid Build Coastguard Worker  * follows a less-aligned interval, this also sorts them to remove such
1031*61046927SAndroid Build Coastguard Worker  * "padding", which may be required when space is very tight.  This isn't
1032*61046927SAndroid Build Coastguard Worker  * amazing, but should be used only as a last resort in case the register file
1033*61046927SAndroid Build Coastguard Worker  * is almost full and badly fragmented.
1034*61046927SAndroid Build Coastguard Worker  *
1035*61046927SAndroid Build Coastguard Worker  * Return the physreg to use.
1036*61046927SAndroid Build Coastguard Worker  */
1037*61046927SAndroid Build Coastguard Worker static physreg_t
compress_regs_left(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg)1038*61046927SAndroid Build Coastguard Worker compress_regs_left(struct ra_ctx *ctx, struct ra_file *file,
1039*61046927SAndroid Build Coastguard Worker                    struct ir3_register *reg)
1040*61046927SAndroid Build Coastguard Worker {
1041*61046927SAndroid Build Coastguard Worker    unsigned reg_align = reg_elem_size(reg);
1042*61046927SAndroid Build Coastguard Worker    DECLARE_ARRAY(struct ra_removed_interval, intervals);
1043*61046927SAndroid Build Coastguard Worker    intervals_count = intervals_sz = 0;
1044*61046927SAndroid Build Coastguard Worker    intervals = NULL;
1045*61046927SAndroid Build Coastguard Worker 
1046*61046927SAndroid Build Coastguard Worker    DECLARE_ARRAY(struct ir3_register *, dsts);
1047*61046927SAndroid Build Coastguard Worker    dsts_count = dsts_sz = 0;
1048*61046927SAndroid Build Coastguard Worker    dsts = NULL;
1049*61046927SAndroid Build Coastguard Worker    array_insert(ctx, dsts, reg);
1050*61046927SAndroid Build Coastguard Worker    bool dst_inserted[reg->instr->dsts_count];
1051*61046927SAndroid Build Coastguard Worker 
1052*61046927SAndroid Build Coastguard Worker    unsigned dst_size = reg->tied ? 0 : reg_size(reg);
1053*61046927SAndroid Build Coastguard Worker    unsigned ec_dst_size = is_early_clobber(reg) ? reg_size(reg) : 0;
1054*61046927SAndroid Build Coastguard Worker    unsigned half_dst_size = 0, ec_half_dst_size = 0;
1055*61046927SAndroid Build Coastguard Worker    if (reg_align == 1) {
1056*61046927SAndroid Build Coastguard Worker       half_dst_size = dst_size;
1057*61046927SAndroid Build Coastguard Worker       ec_half_dst_size = ec_dst_size;
1058*61046927SAndroid Build Coastguard Worker    }
1059*61046927SAndroid Build Coastguard Worker 
1060*61046927SAndroid Build Coastguard Worker    unsigned removed_size = 0, removed_half_size = 0;
1061*61046927SAndroid Build Coastguard Worker    unsigned removed_killed_size = 0, removed_killed_half_size = 0;
1062*61046927SAndroid Build Coastguard Worker    unsigned file_size = reg_file_size(file, reg);
1063*61046927SAndroid Build Coastguard Worker    physreg_t start_reg = 0;
1064*61046927SAndroid Build Coastguard Worker 
1065*61046927SAndroid Build Coastguard Worker    foreach_interval_rev_safe (interval, file) {
1066*61046927SAndroid Build Coastguard Worker       /* We'll check if we can compact the intervals starting here. */
1067*61046927SAndroid Build Coastguard Worker       physreg_t candidate_start = interval->physreg_end;
1068*61046927SAndroid Build Coastguard Worker 
1069*61046927SAndroid Build Coastguard Worker       /* Check if there are any other destinations we need to compact. */
1070*61046927SAndroid Build Coastguard Worker       ra_foreach_dst_n (other_dst, n, reg->instr) {
1071*61046927SAndroid Build Coastguard Worker          if (other_dst == reg)
1072*61046927SAndroid Build Coastguard Worker             break;
1073*61046927SAndroid Build Coastguard Worker          if (ra_get_file(ctx, other_dst) != file)
1074*61046927SAndroid Build Coastguard Worker             continue;
1075*61046927SAndroid Build Coastguard Worker          if (dst_inserted[n])
1076*61046927SAndroid Build Coastguard Worker             continue;
1077*61046927SAndroid Build Coastguard Worker 
1078*61046927SAndroid Build Coastguard Worker          struct ra_interval *other_interval = &ctx->intervals[other_dst->name];
1079*61046927SAndroid Build Coastguard Worker          /* if the destination partially overlaps this interval, we need to
1080*61046927SAndroid Build Coastguard Worker           * extend candidate_start to the end.
1081*61046927SAndroid Build Coastguard Worker           */
1082*61046927SAndroid Build Coastguard Worker          if (other_interval->physreg_start < candidate_start) {
1083*61046927SAndroid Build Coastguard Worker             candidate_start = MAX2(candidate_start,
1084*61046927SAndroid Build Coastguard Worker                                    other_interval->physreg_end);
1085*61046927SAndroid Build Coastguard Worker             continue;
1086*61046927SAndroid Build Coastguard Worker          }
1087*61046927SAndroid Build Coastguard Worker 
1088*61046927SAndroid Build Coastguard Worker          dst_inserted[n] = true;
1089*61046927SAndroid Build Coastguard Worker 
1090*61046927SAndroid Build Coastguard Worker          /* dst intervals with a tied killed source are considered attached to
1091*61046927SAndroid Build Coastguard Worker           * that source. Don't actually insert them. This means we have to
1092*61046927SAndroid Build Coastguard Worker           * update them below if their tied source moves.
1093*61046927SAndroid Build Coastguard Worker           */
1094*61046927SAndroid Build Coastguard Worker          if (other_dst->tied) {
1095*61046927SAndroid Build Coastguard Worker             struct ra_interval *tied_interval =
1096*61046927SAndroid Build Coastguard Worker                &ctx->intervals[other_dst->tied->def->name];
1097*61046927SAndroid Build Coastguard Worker             if (tied_interval->is_killed)
1098*61046927SAndroid Build Coastguard Worker                continue;
1099*61046927SAndroid Build Coastguard Worker          }
1100*61046927SAndroid Build Coastguard Worker 
1101*61046927SAndroid Build Coastguard Worker          d("popping destination %u physreg %u\n",
1102*61046927SAndroid Build Coastguard Worker            other_interval->interval.reg->name,
1103*61046927SAndroid Build Coastguard Worker            other_interval->physreg_start);
1104*61046927SAndroid Build Coastguard Worker 
1105*61046927SAndroid Build Coastguard Worker          array_insert(ctx, dsts, other_dst);
1106*61046927SAndroid Build Coastguard Worker          unsigned interval_size = reg_size(other_dst);
1107*61046927SAndroid Build Coastguard Worker          if (is_early_clobber(other_dst)) {
1108*61046927SAndroid Build Coastguard Worker             ec_dst_size += interval_size;
1109*61046927SAndroid Build Coastguard Worker             if (other_interval->interval.reg->flags & IR3_REG_HALF)
1110*61046927SAndroid Build Coastguard Worker                ec_half_dst_size += interval_size;
1111*61046927SAndroid Build Coastguard Worker          } else {
1112*61046927SAndroid Build Coastguard Worker             dst_size += interval_size;
1113*61046927SAndroid Build Coastguard Worker             if (other_interval->interval.reg->flags & IR3_REG_HALF)
1114*61046927SAndroid Build Coastguard Worker                half_dst_size += interval_size;
1115*61046927SAndroid Build Coastguard Worker          }
1116*61046927SAndroid Build Coastguard Worker       }
1117*61046927SAndroid Build Coastguard Worker 
1118*61046927SAndroid Build Coastguard Worker       /* Check if we can sort the intervals *after* this one and have enough
1119*61046927SAndroid Build Coastguard Worker        * space leftover to accomodate all intervals, keeping in mind that killed
1120*61046927SAndroid Build Coastguard Worker        * sources overlap non-tied destinations. Also check that we have enough
1121*61046927SAndroid Build Coastguard Worker        * space leftover for half-registers, if we're inserting a half-register
1122*61046927SAndroid Build Coastguard Worker        * (otherwise we only shift any half-registers down so they should be
1123*61046927SAndroid Build Coastguard Worker        * safe).
1124*61046927SAndroid Build Coastguard Worker        */
1125*61046927SAndroid Build Coastguard Worker       if (candidate_start + removed_size + ec_dst_size +
1126*61046927SAndroid Build Coastguard Worker           MAX2(removed_killed_size, dst_size) <= file->size &&
1127*61046927SAndroid Build Coastguard Worker           (reg_align != 1 ||
1128*61046927SAndroid Build Coastguard Worker            candidate_start + removed_half_size + ec_half_dst_size +
1129*61046927SAndroid Build Coastguard Worker            MAX2(removed_killed_half_size, half_dst_size) <= file_size)) {
1130*61046927SAndroid Build Coastguard Worker          start_reg = candidate_start;
1131*61046927SAndroid Build Coastguard Worker          break;
1132*61046927SAndroid Build Coastguard Worker       }
1133*61046927SAndroid Build Coastguard Worker 
1134*61046927SAndroid Build Coastguard Worker       /* We assume that all frozen intervals are at the start and that we
1135*61046927SAndroid Build Coastguard Worker        * can avoid popping them.
1136*61046927SAndroid Build Coastguard Worker        */
1137*61046927SAndroid Build Coastguard Worker       assert(!interval->frozen);
1138*61046927SAndroid Build Coastguard Worker 
1139*61046927SAndroid Build Coastguard Worker       /* Killed sources are different because they go at the end and can
1140*61046927SAndroid Build Coastguard Worker        * overlap the register we're trying to add.
1141*61046927SAndroid Build Coastguard Worker        */
1142*61046927SAndroid Build Coastguard Worker       unsigned interval_size = interval->physreg_end - interval->physreg_start;
1143*61046927SAndroid Build Coastguard Worker       if (interval->is_killed) {
1144*61046927SAndroid Build Coastguard Worker          removed_killed_size += interval_size;
1145*61046927SAndroid Build Coastguard Worker          if (interval->interval.reg->flags & IR3_REG_HALF)
1146*61046927SAndroid Build Coastguard Worker             removed_killed_half_size += interval_size;
1147*61046927SAndroid Build Coastguard Worker       } else {
1148*61046927SAndroid Build Coastguard Worker          removed_size += interval_size;
1149*61046927SAndroid Build Coastguard Worker          if (interval->interval.reg->flags & IR3_REG_HALF)
1150*61046927SAndroid Build Coastguard Worker             removed_half_size += interval_size;
1151*61046927SAndroid Build Coastguard Worker       }
1152*61046927SAndroid Build Coastguard Worker 
1153*61046927SAndroid Build Coastguard Worker       /* Now that we've done the accounting, pop this off */
1154*61046927SAndroid Build Coastguard Worker       d("popping interval %u physreg %u%s\n", interval->interval.reg->name,
1155*61046927SAndroid Build Coastguard Worker         interval->physreg_start, interval->is_killed ? ", killed" : "");
1156*61046927SAndroid Build Coastguard Worker       array_insert(ctx, intervals, ra_pop_interval(ctx, file, interval));
1157*61046927SAndroid Build Coastguard Worker    }
1158*61046927SAndroid Build Coastguard Worker 
1159*61046927SAndroid Build Coastguard Worker    /* TODO: In addition to skipping registers at the beginning that are
1160*61046927SAndroid Build Coastguard Worker     * well-packed, we should try to skip registers at the end.
1161*61046927SAndroid Build Coastguard Worker     */
1162*61046927SAndroid Build Coastguard Worker 
1163*61046927SAndroid Build Coastguard Worker    qsort(intervals, intervals_count, sizeof(*intervals), removed_interval_cmp);
1164*61046927SAndroid Build Coastguard Worker    qsort(dsts, dsts_count, sizeof(*dsts), dsts_cmp);
1165*61046927SAndroid Build Coastguard Worker 
1166*61046927SAndroid Build Coastguard Worker    physreg_t live_reg = start_reg;
1167*61046927SAndroid Build Coastguard Worker    physreg_t dst_reg = (physreg_t)~0;
1168*61046927SAndroid Build Coastguard Worker    physreg_t ret_reg = (physreg_t)~0;
1169*61046927SAndroid Build Coastguard Worker    unsigned dst_index = 0;
1170*61046927SAndroid Build Coastguard Worker    unsigned live_index = 0;
1171*61046927SAndroid Build Coastguard Worker 
1172*61046927SAndroid Build Coastguard Worker    /* We have two lists of intervals to process, live intervals and destination
1173*61046927SAndroid Build Coastguard Worker     * intervals. Process them in the order of the disgram in insert_cmp().
1174*61046927SAndroid Build Coastguard Worker     */
1175*61046927SAndroid Build Coastguard Worker    while (live_index < intervals_count || dst_index < dsts_count) {
1176*61046927SAndroid Build Coastguard Worker       bool process_dst;
1177*61046927SAndroid Build Coastguard Worker       if (live_index == intervals_count) {
1178*61046927SAndroid Build Coastguard Worker          process_dst = true;
1179*61046927SAndroid Build Coastguard Worker       } else if (dst_index == dsts_count) {
1180*61046927SAndroid Build Coastguard Worker          process_dst = false;
1181*61046927SAndroid Build Coastguard Worker       } else {
1182*61046927SAndroid Build Coastguard Worker          struct ir3_register *dst = dsts[dst_index];
1183*61046927SAndroid Build Coastguard Worker          struct ra_interval *live_interval = intervals[live_index].interval;
1184*61046927SAndroid Build Coastguard Worker 
1185*61046927SAndroid Build Coastguard Worker          bool live_half = live_interval->interval.reg->flags & IR3_REG_HALF;
1186*61046927SAndroid Build Coastguard Worker          bool live_killed = live_interval->is_killed;
1187*61046927SAndroid Build Coastguard Worker          bool dst_half = dst->flags & IR3_REG_HALF;
1188*61046927SAndroid Build Coastguard Worker          bool dst_early_clobber = is_early_clobber(dst);
1189*61046927SAndroid Build Coastguard Worker 
1190*61046927SAndroid Build Coastguard Worker          if (live_half && !live_killed) {
1191*61046927SAndroid Build Coastguard Worker             /* far-left of diagram. */
1192*61046927SAndroid Build Coastguard Worker             process_dst = false;
1193*61046927SAndroid Build Coastguard Worker          } else if (dst_half && dst_early_clobber) {
1194*61046927SAndroid Build Coastguard Worker             /* mid-left of diagram. */
1195*61046927SAndroid Build Coastguard Worker             process_dst = true;
1196*61046927SAndroid Build Coastguard Worker          } else if (!dst_early_clobber) {
1197*61046927SAndroid Build Coastguard Worker             /* bottom of disagram. */
1198*61046927SAndroid Build Coastguard Worker             process_dst = true;
1199*61046927SAndroid Build Coastguard Worker          } else if (live_killed) {
1200*61046927SAndroid Build Coastguard Worker             /* middle of diagram. */
1201*61046927SAndroid Build Coastguard Worker             process_dst = false;
1202*61046927SAndroid Build Coastguard Worker          } else if (!dst_half && dst_early_clobber) {
1203*61046927SAndroid Build Coastguard Worker             /* mid-right of diagram. */
1204*61046927SAndroid Build Coastguard Worker             process_dst = true;
1205*61046927SAndroid Build Coastguard Worker          } else {
1206*61046927SAndroid Build Coastguard Worker             /* far right of diagram. */
1207*61046927SAndroid Build Coastguard Worker             assert(!live_killed && !live_half);
1208*61046927SAndroid Build Coastguard Worker             process_dst = false;
1209*61046927SAndroid Build Coastguard Worker          }
1210*61046927SAndroid Build Coastguard Worker       }
1211*61046927SAndroid Build Coastguard Worker 
1212*61046927SAndroid Build Coastguard Worker       struct ir3_register *cur_reg =
1213*61046927SAndroid Build Coastguard Worker          process_dst ? dsts[dst_index] :
1214*61046927SAndroid Build Coastguard Worker          intervals[live_index].interval->interval.reg;
1215*61046927SAndroid Build Coastguard Worker 
1216*61046927SAndroid Build Coastguard Worker       physreg_t physreg;
1217*61046927SAndroid Build Coastguard Worker       if (process_dst && !is_early_clobber(cur_reg)) {
1218*61046927SAndroid Build Coastguard Worker          if (dst_reg == (physreg_t)~0)
1219*61046927SAndroid Build Coastguard Worker             dst_reg = live_reg;
1220*61046927SAndroid Build Coastguard Worker          physreg = dst_reg;
1221*61046927SAndroid Build Coastguard Worker       } else {
1222*61046927SAndroid Build Coastguard Worker          physreg = live_reg;
1223*61046927SAndroid Build Coastguard Worker          struct ra_interval *live_interval = intervals[live_index].interval;
1224*61046927SAndroid Build Coastguard Worker          bool live_killed = live_interval->is_killed;
1225*61046927SAndroid Build Coastguard Worker          /* If this is live-through and we've processed the destinations, we
1226*61046927SAndroid Build Coastguard Worker           * need to make sure we take into account any overlapping destinations.
1227*61046927SAndroid Build Coastguard Worker           */
1228*61046927SAndroid Build Coastguard Worker          if (!live_killed && dst_reg != (physreg_t)~0)
1229*61046927SAndroid Build Coastguard Worker             physreg = MAX2(physreg, dst_reg);
1230*61046927SAndroid Build Coastguard Worker       }
1231*61046927SAndroid Build Coastguard Worker 
1232*61046927SAndroid Build Coastguard Worker       if (!(cur_reg->flags & IR3_REG_HALF))
1233*61046927SAndroid Build Coastguard Worker          physreg = ALIGN(physreg, 2);
1234*61046927SAndroid Build Coastguard Worker 
1235*61046927SAndroid Build Coastguard Worker       d("pushing reg %u physreg %u\n", cur_reg->name, physreg);
1236*61046927SAndroid Build Coastguard Worker 
1237*61046927SAndroid Build Coastguard Worker       unsigned interval_size = reg_size(cur_reg);
1238*61046927SAndroid Build Coastguard Worker       if (physreg + interval_size >
1239*61046927SAndroid Build Coastguard Worker           reg_file_size(file, cur_reg)) {
1240*61046927SAndroid Build Coastguard Worker          d("ran out of room for interval %u!\n",
1241*61046927SAndroid Build Coastguard Worker            cur_reg->name);
1242*61046927SAndroid Build Coastguard Worker          unreachable("reg pressure calculation was wrong!");
1243*61046927SAndroid Build Coastguard Worker          return 0;
1244*61046927SAndroid Build Coastguard Worker       }
1245*61046927SAndroid Build Coastguard Worker 
1246*61046927SAndroid Build Coastguard Worker       if (process_dst) {
1247*61046927SAndroid Build Coastguard Worker          if (cur_reg == reg) {
1248*61046927SAndroid Build Coastguard Worker             ret_reg = physreg;
1249*61046927SAndroid Build Coastguard Worker          } else {
1250*61046927SAndroid Build Coastguard Worker             struct ra_interval *interval = &ctx->intervals[cur_reg->name];
1251*61046927SAndroid Build Coastguard Worker             interval->physreg_start = physreg;
1252*61046927SAndroid Build Coastguard Worker             interval->physreg_end = physreg + interval_size;
1253*61046927SAndroid Build Coastguard Worker          }
1254*61046927SAndroid Build Coastguard Worker          dst_index++;
1255*61046927SAndroid Build Coastguard Worker       } else {
1256*61046927SAndroid Build Coastguard Worker          ra_push_interval(ctx, file, &intervals[live_index], physreg);
1257*61046927SAndroid Build Coastguard Worker          live_index++;
1258*61046927SAndroid Build Coastguard Worker       }
1259*61046927SAndroid Build Coastguard Worker 
1260*61046927SAndroid Build Coastguard Worker       physreg += interval_size;
1261*61046927SAndroid Build Coastguard Worker 
1262*61046927SAndroid Build Coastguard Worker       if (process_dst && !is_early_clobber(cur_reg)) {
1263*61046927SAndroid Build Coastguard Worker          dst_reg = physreg;
1264*61046927SAndroid Build Coastguard Worker       } else {
1265*61046927SAndroid Build Coastguard Worker          live_reg = physreg;
1266*61046927SAndroid Build Coastguard Worker       }
1267*61046927SAndroid Build Coastguard Worker    }
1268*61046927SAndroid Build Coastguard Worker 
1269*61046927SAndroid Build Coastguard Worker    /* If we shuffled around a tied source that is killed, we may have to update
1270*61046927SAndroid Build Coastguard Worker     * its corresponding destination since we didn't insert it above.
1271*61046927SAndroid Build Coastguard Worker     */
1272*61046927SAndroid Build Coastguard Worker    ra_foreach_dst (dst, reg->instr) {
1273*61046927SAndroid Build Coastguard Worker       if (dst == reg)
1274*61046927SAndroid Build Coastguard Worker          break;
1275*61046927SAndroid Build Coastguard Worker 
1276*61046927SAndroid Build Coastguard Worker       struct ir3_register *tied = dst->tied;
1277*61046927SAndroid Build Coastguard Worker       if (!tied)
1278*61046927SAndroid Build Coastguard Worker          continue;
1279*61046927SAndroid Build Coastguard Worker 
1280*61046927SAndroid Build Coastguard Worker       struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
1281*61046927SAndroid Build Coastguard Worker       if (!tied_interval->is_killed)
1282*61046927SAndroid Build Coastguard Worker          continue;
1283*61046927SAndroid Build Coastguard Worker 
1284*61046927SAndroid Build Coastguard Worker       struct ra_interval *dst_interval = &ctx->intervals[dst->name];
1285*61046927SAndroid Build Coastguard Worker       unsigned dst_size = reg_size(dst);
1286*61046927SAndroid Build Coastguard Worker       dst_interval->physreg_start = ra_interval_get_physreg(tied_interval);
1287*61046927SAndroid Build Coastguard Worker       dst_interval->physreg_end = dst_interval->physreg_start + dst_size;
1288*61046927SAndroid Build Coastguard Worker    }
1289*61046927SAndroid Build Coastguard Worker 
1290*61046927SAndroid Build Coastguard Worker    return ret_reg;
1291*61046927SAndroid Build Coastguard Worker }
1292*61046927SAndroid Build Coastguard Worker 
1293*61046927SAndroid Build Coastguard Worker void
ra_update_affinity(unsigned file_size,struct ir3_register * reg,physreg_t physreg)1294*61046927SAndroid Build Coastguard Worker ra_update_affinity(unsigned file_size, struct ir3_register *reg,
1295*61046927SAndroid Build Coastguard Worker                    physreg_t physreg)
1296*61046927SAndroid Build Coastguard Worker {
1297*61046927SAndroid Build Coastguard Worker    if (!reg->merge_set || reg->merge_set->preferred_reg != (physreg_t)~0)
1298*61046927SAndroid Build Coastguard Worker       return;
1299*61046927SAndroid Build Coastguard Worker 
1300*61046927SAndroid Build Coastguard Worker    if (physreg < reg->merge_set_offset)
1301*61046927SAndroid Build Coastguard Worker       return;
1302*61046927SAndroid Build Coastguard Worker 
1303*61046927SAndroid Build Coastguard Worker    if ((physreg - reg->merge_set_offset + reg->merge_set->size) > file_size)
1304*61046927SAndroid Build Coastguard Worker       return;
1305*61046927SAndroid Build Coastguard Worker 
1306*61046927SAndroid Build Coastguard Worker    reg->merge_set->preferred_reg = physreg - reg->merge_set_offset;
1307*61046927SAndroid Build Coastguard Worker }
1308*61046927SAndroid Build Coastguard Worker 
1309*61046927SAndroid Build Coastguard Worker /* Try to find free space for a register without shuffling anything. This uses
1310*61046927SAndroid Build Coastguard Worker  * a round-robin algorithm to reduce false dependencies.
1311*61046927SAndroid Build Coastguard Worker  */
1312*61046927SAndroid Build Coastguard Worker static physreg_t
find_best_gap(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * dst,unsigned file_size,unsigned size,unsigned alignment)1313*61046927SAndroid Build Coastguard Worker find_best_gap(struct ra_ctx *ctx, struct ra_file *file,
1314*61046927SAndroid Build Coastguard Worker               struct ir3_register *dst, unsigned file_size, unsigned size,
1315*61046927SAndroid Build Coastguard Worker               unsigned alignment)
1316*61046927SAndroid Build Coastguard Worker {
1317*61046927SAndroid Build Coastguard Worker    /* This can happen if we create a very large merge set. Just bail out in that
1318*61046927SAndroid Build Coastguard Worker     * case.
1319*61046927SAndroid Build Coastguard Worker     */
1320*61046927SAndroid Build Coastguard Worker    if (size > file_size)
1321*61046927SAndroid Build Coastguard Worker       return (physreg_t) ~0;
1322*61046927SAndroid Build Coastguard Worker 
1323*61046927SAndroid Build Coastguard Worker    BITSET_WORD *available =
1324*61046927SAndroid Build Coastguard Worker       is_early_clobber(dst) ? file->available_to_evict : file->available;
1325*61046927SAndroid Build Coastguard Worker 
1326*61046927SAndroid Build Coastguard Worker    unsigned start = ALIGN(file->start, alignment) % (file_size - size + alignment);
1327*61046927SAndroid Build Coastguard Worker    unsigned candidate = start;
1328*61046927SAndroid Build Coastguard Worker    do {
1329*61046927SAndroid Build Coastguard Worker       bool is_available = true;
1330*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < size; i++) {
1331*61046927SAndroid Build Coastguard Worker          if (!BITSET_TEST(available, candidate + i)) {
1332*61046927SAndroid Build Coastguard Worker             is_available = false;
1333*61046927SAndroid Build Coastguard Worker             break;
1334*61046927SAndroid Build Coastguard Worker          }
1335*61046927SAndroid Build Coastguard Worker       }
1336*61046927SAndroid Build Coastguard Worker 
1337*61046927SAndroid Build Coastguard Worker       if (is_available) {
1338*61046927SAndroid Build Coastguard Worker          is_available =
1339*61046927SAndroid Build Coastguard Worker             !check_dst_overlap(ctx, file, dst, candidate, candidate + size);
1340*61046927SAndroid Build Coastguard Worker       }
1341*61046927SAndroid Build Coastguard Worker 
1342*61046927SAndroid Build Coastguard Worker       if (is_available) {
1343*61046927SAndroid Build Coastguard Worker          file->start = (candidate + size) % file_size;
1344*61046927SAndroid Build Coastguard Worker          return candidate;
1345*61046927SAndroid Build Coastguard Worker       }
1346*61046927SAndroid Build Coastguard Worker 
1347*61046927SAndroid Build Coastguard Worker       candidate += alignment;
1348*61046927SAndroid Build Coastguard Worker       if (candidate + size > file_size)
1349*61046927SAndroid Build Coastguard Worker          candidate = 0;
1350*61046927SAndroid Build Coastguard Worker    } while (candidate != start);
1351*61046927SAndroid Build Coastguard Worker 
1352*61046927SAndroid Build Coastguard Worker    return (physreg_t)~0;
1353*61046927SAndroid Build Coastguard Worker }
1354*61046927SAndroid Build Coastguard Worker 
1355*61046927SAndroid Build Coastguard Worker static physreg_t
try_allocate_src(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg)1356*61046927SAndroid Build Coastguard Worker try_allocate_src(struct ra_ctx *ctx, struct ra_file *file,
1357*61046927SAndroid Build Coastguard Worker                  struct ir3_register *reg)
1358*61046927SAndroid Build Coastguard Worker {
1359*61046927SAndroid Build Coastguard Worker    unsigned file_size = reg_file_size(file, reg);
1360*61046927SAndroid Build Coastguard Worker    unsigned size = reg_size(reg);
1361*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < reg->instr->srcs_count; i++) {
1362*61046927SAndroid Build Coastguard Worker       struct ir3_register *src = reg->instr->srcs[i];
1363*61046927SAndroid Build Coastguard Worker       if (!ra_reg_is_src(src))
1364*61046927SAndroid Build Coastguard Worker          continue;
1365*61046927SAndroid Build Coastguard Worker       if (ra_get_file(ctx, src) == file && reg_size(src) >= size) {
1366*61046927SAndroid Build Coastguard Worker          struct ra_interval *src_interval = &ctx->intervals[src->def->name];
1367*61046927SAndroid Build Coastguard Worker          physreg_t src_physreg = ra_interval_get_physreg(src_interval);
1368*61046927SAndroid Build Coastguard Worker          if (src_physreg % reg_elem_size(reg) == 0 &&
1369*61046927SAndroid Build Coastguard Worker              src_physreg + size <= file_size &&
1370*61046927SAndroid Build Coastguard Worker              get_reg_specified(ctx, file, reg, src_physreg, false))
1371*61046927SAndroid Build Coastguard Worker             return src_physreg;
1372*61046927SAndroid Build Coastguard Worker       }
1373*61046927SAndroid Build Coastguard Worker    }
1374*61046927SAndroid Build Coastguard Worker 
1375*61046927SAndroid Build Coastguard Worker    return ~0;
1376*61046927SAndroid Build Coastguard Worker }
1377*61046927SAndroid Build Coastguard Worker 
1378*61046927SAndroid Build Coastguard Worker static bool
rpt_has_unique_merge_set(struct ir3_instruction * instr)1379*61046927SAndroid Build Coastguard Worker rpt_has_unique_merge_set(struct ir3_instruction *instr)
1380*61046927SAndroid Build Coastguard Worker {
1381*61046927SAndroid Build Coastguard Worker    assert(ir3_instr_is_rpt(instr));
1382*61046927SAndroid Build Coastguard Worker 
1383*61046927SAndroid Build Coastguard Worker    if (!instr->dsts[0]->merge_set)
1384*61046927SAndroid Build Coastguard Worker       return false;
1385*61046927SAndroid Build Coastguard Worker 
1386*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *first = ir3_instr_first_rpt(instr);
1387*61046927SAndroid Build Coastguard Worker    struct ir3_register *def = first->dsts[0];
1388*61046927SAndroid Build Coastguard Worker 
1389*61046927SAndroid Build Coastguard Worker    if (def->merge_set != instr->dsts[0]->merge_set ||
1390*61046927SAndroid Build Coastguard Worker        def->merge_set->regs_count != ir3_instr_rpt_length(first)) {
1391*61046927SAndroid Build Coastguard Worker       return false;
1392*61046927SAndroid Build Coastguard Worker    }
1393*61046927SAndroid Build Coastguard Worker 
1394*61046927SAndroid Build Coastguard Worker    unsigned i = 0;
1395*61046927SAndroid Build Coastguard Worker 
1396*61046927SAndroid Build Coastguard Worker    foreach_instr_rpt (rpt, first) {
1397*61046927SAndroid Build Coastguard Worker       if (rpt->dsts[0] != def->merge_set->regs[i++])
1398*61046927SAndroid Build Coastguard Worker          return false;
1399*61046927SAndroid Build Coastguard Worker    }
1400*61046927SAndroid Build Coastguard Worker 
1401*61046927SAndroid Build Coastguard Worker    return true;
1402*61046927SAndroid Build Coastguard Worker }
1403*61046927SAndroid Build Coastguard Worker 
1404*61046927SAndroid Build Coastguard Worker /* This is the main entrypoint for picking a register. Pick a free register
1405*61046927SAndroid Build Coastguard Worker  * for "reg", shuffling around sources if necessary. In the normal case where
1406*61046927SAndroid Build Coastguard Worker  * "is_source" is false, this register can overlap with killed sources
1407*61046927SAndroid Build Coastguard Worker  * (intervals with "is_killed == true"). If "is_source" is true, then
1408*61046927SAndroid Build Coastguard Worker  * is_killed is ignored and the register returned must not overlap with killed
1409*61046927SAndroid Build Coastguard Worker  * sources. This must be used for tied registers, because we're actually
1410*61046927SAndroid Build Coastguard Worker  * allocating the destination and the tied source at the same time.
1411*61046927SAndroid Build Coastguard Worker  */
1412*61046927SAndroid Build Coastguard Worker 
1413*61046927SAndroid Build Coastguard Worker static physreg_t
get_reg(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg)1414*61046927SAndroid Build Coastguard Worker get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg)
1415*61046927SAndroid Build Coastguard Worker {
1416*61046927SAndroid Build Coastguard Worker    unsigned file_size = reg_file_size(file, reg);
1417*61046927SAndroid Build Coastguard Worker    if (reg->merge_set && reg->merge_set->preferred_reg != (physreg_t)~0) {
1418*61046927SAndroid Build Coastguard Worker       physreg_t preferred_reg =
1419*61046927SAndroid Build Coastguard Worker          reg->merge_set->preferred_reg + reg->merge_set_offset;
1420*61046927SAndroid Build Coastguard Worker       if (preferred_reg + reg_size(reg) <= file_size &&
1421*61046927SAndroid Build Coastguard Worker           preferred_reg % reg_elem_size(reg) == 0 &&
1422*61046927SAndroid Build Coastguard Worker           get_reg_specified(ctx, file, reg, preferred_reg, false))
1423*61046927SAndroid Build Coastguard Worker          return preferred_reg;
1424*61046927SAndroid Build Coastguard Worker    }
1425*61046927SAndroid Build Coastguard Worker 
1426*61046927SAndroid Build Coastguard Worker    /* For repeated instructions whose merge set is unique (i.e., only used for
1427*61046927SAndroid Build Coastguard Worker     * these repeated instructions), try to first allocate one of their sources
1428*61046927SAndroid Build Coastguard Worker     * (for the same reason as for ALU/SFU instructions explained below). This
1429*61046927SAndroid Build Coastguard Worker     * also prevents us from allocating a new register range for this merge set
1430*61046927SAndroid Build Coastguard Worker     * when the one from a source could be reused.
1431*61046927SAndroid Build Coastguard Worker     */
1432*61046927SAndroid Build Coastguard Worker    if (ir3_instr_is_rpt(reg->instr) && rpt_has_unique_merge_set(reg->instr)) {
1433*61046927SAndroid Build Coastguard Worker       physreg_t src_reg = try_allocate_src(ctx, file, reg);
1434*61046927SAndroid Build Coastguard Worker       if (src_reg != (physreg_t)~0)
1435*61046927SAndroid Build Coastguard Worker          return src_reg;
1436*61046927SAndroid Build Coastguard Worker    }
1437*61046927SAndroid Build Coastguard Worker 
1438*61046927SAndroid Build Coastguard Worker    /* If this register is a subset of a merge set which we have not picked a
1439*61046927SAndroid Build Coastguard Worker     * register for, first try to allocate enough space for the entire merge
1440*61046927SAndroid Build Coastguard Worker     * set.
1441*61046927SAndroid Build Coastguard Worker     */
1442*61046927SAndroid Build Coastguard Worker    unsigned size = reg_size(reg);
1443*61046927SAndroid Build Coastguard Worker    if (reg->merge_set && reg->merge_set->preferred_reg == (physreg_t)~0 &&
1444*61046927SAndroid Build Coastguard Worker        size < reg->merge_set->size) {
1445*61046927SAndroid Build Coastguard Worker       physreg_t best_reg = find_best_gap(ctx, file, reg, file_size,
1446*61046927SAndroid Build Coastguard Worker                                          reg->merge_set->size,
1447*61046927SAndroid Build Coastguard Worker                                          reg->merge_set->alignment);
1448*61046927SAndroid Build Coastguard Worker       if (best_reg != (physreg_t)~0u) {
1449*61046927SAndroid Build Coastguard Worker          best_reg += reg->merge_set_offset;
1450*61046927SAndroid Build Coastguard Worker          return best_reg;
1451*61046927SAndroid Build Coastguard Worker       }
1452*61046927SAndroid Build Coastguard Worker    }
1453*61046927SAndroid Build Coastguard Worker 
1454*61046927SAndroid Build Coastguard Worker    /* For ALU and SFU instructions, if the src reg is avail to pick, use it.
1455*61046927SAndroid Build Coastguard Worker     * Because this doesn't introduce unnecessary dependencies, and it
1456*61046927SAndroid Build Coastguard Worker     * potentially avoids needing (ss) syncs for write after read hazards for
1457*61046927SAndroid Build Coastguard Worker     * SFU instructions:
1458*61046927SAndroid Build Coastguard Worker     */
1459*61046927SAndroid Build Coastguard Worker    if (is_sfu(reg->instr) || is_alu(reg->instr)) {
1460*61046927SAndroid Build Coastguard Worker       physreg_t src_reg = try_allocate_src(ctx, file, reg);
1461*61046927SAndroid Build Coastguard Worker       if (src_reg != (physreg_t)~0)
1462*61046927SAndroid Build Coastguard Worker          return src_reg;
1463*61046927SAndroid Build Coastguard Worker    }
1464*61046927SAndroid Build Coastguard Worker 
1465*61046927SAndroid Build Coastguard Worker    physreg_t best_reg =
1466*61046927SAndroid Build Coastguard Worker       find_best_gap(ctx, file, reg, file_size, size, reg_elem_size(reg));
1467*61046927SAndroid Build Coastguard Worker    if (best_reg != (physreg_t)~0u) {
1468*61046927SAndroid Build Coastguard Worker       return best_reg;
1469*61046927SAndroid Build Coastguard Worker    }
1470*61046927SAndroid Build Coastguard Worker 
1471*61046927SAndroid Build Coastguard Worker    /* Ok, we couldn't find anything that fits. Here is where we have to start
1472*61046927SAndroid Build Coastguard Worker     * moving things around to make stuff fit. First try solely evicting
1473*61046927SAndroid Build Coastguard Worker     * registers in the way.
1474*61046927SAndroid Build Coastguard Worker     */
1475*61046927SAndroid Build Coastguard Worker    unsigned best_eviction_count = ~0;
1476*61046927SAndroid Build Coastguard Worker    for (physreg_t i = 0; i + size <= file_size; i += reg_elem_size(reg)) {
1477*61046927SAndroid Build Coastguard Worker       unsigned eviction_count;
1478*61046927SAndroid Build Coastguard Worker       if (try_evict_regs(ctx, file, reg, i, &eviction_count, false, true)) {
1479*61046927SAndroid Build Coastguard Worker          if (eviction_count < best_eviction_count) {
1480*61046927SAndroid Build Coastguard Worker             best_eviction_count = eviction_count;
1481*61046927SAndroid Build Coastguard Worker             best_reg = i;
1482*61046927SAndroid Build Coastguard Worker          }
1483*61046927SAndroid Build Coastguard Worker       }
1484*61046927SAndroid Build Coastguard Worker    }
1485*61046927SAndroid Build Coastguard Worker 
1486*61046927SAndroid Build Coastguard Worker    if (best_eviction_count != ~0) {
1487*61046927SAndroid Build Coastguard Worker       ASSERTED bool result = try_evict_regs(
1488*61046927SAndroid Build Coastguard Worker          ctx, file, reg, best_reg, &best_eviction_count, false, false);
1489*61046927SAndroid Build Coastguard Worker       assert(result);
1490*61046927SAndroid Build Coastguard Worker       return best_reg;
1491*61046927SAndroid Build Coastguard Worker    }
1492*61046927SAndroid Build Coastguard Worker 
1493*61046927SAndroid Build Coastguard Worker    /* Use the dumb fallback only if try_evict_regs() fails. */
1494*61046927SAndroid Build Coastguard Worker    return compress_regs_left(ctx, file, reg);
1495*61046927SAndroid Build Coastguard Worker }
1496*61046927SAndroid Build Coastguard Worker 
1497*61046927SAndroid Build Coastguard Worker static void
assign_reg(struct ir3_instruction * instr,struct ir3_register * reg,unsigned num)1498*61046927SAndroid Build Coastguard Worker assign_reg(struct ir3_instruction *instr, struct ir3_register *reg,
1499*61046927SAndroid Build Coastguard Worker            unsigned num)
1500*61046927SAndroid Build Coastguard Worker {
1501*61046927SAndroid Build Coastguard Worker    if (reg->flags & IR3_REG_ARRAY) {
1502*61046927SAndroid Build Coastguard Worker       reg->array.base = num;
1503*61046927SAndroid Build Coastguard Worker       if (reg->flags & IR3_REG_RELATIV)
1504*61046927SAndroid Build Coastguard Worker          reg->array.offset += num;
1505*61046927SAndroid Build Coastguard Worker       else
1506*61046927SAndroid Build Coastguard Worker          reg->num = num + reg->array.offset;
1507*61046927SAndroid Build Coastguard Worker    } else {
1508*61046927SAndroid Build Coastguard Worker       reg->num = num;
1509*61046927SAndroid Build Coastguard Worker    }
1510*61046927SAndroid Build Coastguard Worker }
1511*61046927SAndroid Build Coastguard Worker 
1512*61046927SAndroid Build Coastguard Worker static void
mark_src_killed(struct ra_ctx * ctx,struct ir3_register * src)1513*61046927SAndroid Build Coastguard Worker mark_src_killed(struct ra_ctx *ctx, struct ir3_register *src)
1514*61046927SAndroid Build Coastguard Worker {
1515*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[src->def->name];
1516*61046927SAndroid Build Coastguard Worker 
1517*61046927SAndroid Build Coastguard Worker    if (!(src->flags & IR3_REG_FIRST_KILL) || interval->is_killed ||
1518*61046927SAndroid Build Coastguard Worker        interval->interval.parent ||
1519*61046927SAndroid Build Coastguard Worker        !rb_tree_is_empty(&interval->interval.children))
1520*61046927SAndroid Build Coastguard Worker       return;
1521*61046927SAndroid Build Coastguard Worker 
1522*61046927SAndroid Build Coastguard Worker    ra_file_mark_killed(ra_get_file(ctx, src), interval);
1523*61046927SAndroid Build Coastguard Worker }
1524*61046927SAndroid Build Coastguard Worker 
1525*61046927SAndroid Build Coastguard Worker static void
insert_dst(struct ra_ctx * ctx,struct ir3_register * dst)1526*61046927SAndroid Build Coastguard Worker insert_dst(struct ra_ctx *ctx, struct ir3_register *dst)
1527*61046927SAndroid Build Coastguard Worker {
1528*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, dst);
1529*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[dst->name];
1530*61046927SAndroid Build Coastguard Worker 
1531*61046927SAndroid Build Coastguard Worker    d("insert dst %u physreg %u", dst->name, ra_interval_get_physreg(interval));
1532*61046927SAndroid Build Coastguard Worker 
1533*61046927SAndroid Build Coastguard Worker    if (!(dst->flags & IR3_REG_UNUSED))
1534*61046927SAndroid Build Coastguard Worker       ra_file_insert(file, interval);
1535*61046927SAndroid Build Coastguard Worker 
1536*61046927SAndroid Build Coastguard Worker    assign_reg(dst->instr, dst, ra_interval_get_num(interval));
1537*61046927SAndroid Build Coastguard Worker }
1538*61046927SAndroid Build Coastguard Worker 
1539*61046927SAndroid Build Coastguard Worker static void
allocate_dst_fixed(struct ra_ctx * ctx,struct ir3_register * dst,physreg_t physreg)1540*61046927SAndroid Build Coastguard Worker allocate_dst_fixed(struct ra_ctx *ctx, struct ir3_register *dst,
1541*61046927SAndroid Build Coastguard Worker                    physreg_t physreg)
1542*61046927SAndroid Build Coastguard Worker {
1543*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, dst);
1544*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[dst->name];
1545*61046927SAndroid Build Coastguard Worker    ra_update_affinity(file->size, dst, physreg);
1546*61046927SAndroid Build Coastguard Worker 
1547*61046927SAndroid Build Coastguard Worker    ra_interval_init(interval, dst);
1548*61046927SAndroid Build Coastguard Worker    interval->physreg_start = physreg;
1549*61046927SAndroid Build Coastguard Worker    interval->physreg_end = physreg + reg_size(dst);
1550*61046927SAndroid Build Coastguard Worker }
1551*61046927SAndroid Build Coastguard Worker 
1552*61046927SAndroid Build Coastguard Worker /* If a tied destination interferes with its source register, we have to insert
1553*61046927SAndroid Build Coastguard Worker  * a copy beforehand to copy the source to the destination. Because we are using
1554*61046927SAndroid Build Coastguard Worker  * the parallel_copies array and not creating a separate copy, this copy will
1555*61046927SAndroid Build Coastguard Worker  * happen in parallel with any shuffling around of the tied source, so we have
1556*61046927SAndroid Build Coastguard Worker  * to copy the source *as it exists before it is shuffled around*. We do this by
1557*61046927SAndroid Build Coastguard Worker  * inserting the copy early, before any other copies are inserted. We don't
1558*61046927SAndroid Build Coastguard Worker  * actually know the destination of the copy, but that's ok because the
1559*61046927SAndroid Build Coastguard Worker  * dst_interval will be filled out later.
1560*61046927SAndroid Build Coastguard Worker  */
1561*61046927SAndroid Build Coastguard Worker static void
insert_tied_dst_copy(struct ra_ctx * ctx,struct ir3_register * dst)1562*61046927SAndroid Build Coastguard Worker insert_tied_dst_copy(struct ra_ctx *ctx, struct ir3_register *dst)
1563*61046927SAndroid Build Coastguard Worker {
1564*61046927SAndroid Build Coastguard Worker    struct ir3_register *tied = dst->tied;
1565*61046927SAndroid Build Coastguard Worker 
1566*61046927SAndroid Build Coastguard Worker    if (!tied)
1567*61046927SAndroid Build Coastguard Worker       return;
1568*61046927SAndroid Build Coastguard Worker 
1569*61046927SAndroid Build Coastguard Worker    struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
1570*61046927SAndroid Build Coastguard Worker    struct ra_interval *dst_interval = &ctx->intervals[dst->name];
1571*61046927SAndroid Build Coastguard Worker 
1572*61046927SAndroid Build Coastguard Worker    if (tied_interval->is_killed)
1573*61046927SAndroid Build Coastguard Worker       return;
1574*61046927SAndroid Build Coastguard Worker 
1575*61046927SAndroid Build Coastguard Worker    physreg_t tied_physreg = ra_interval_get_physreg(tied_interval);
1576*61046927SAndroid Build Coastguard Worker 
1577*61046927SAndroid Build Coastguard Worker    array_insert(ctx, ctx->parallel_copies,
1578*61046927SAndroid Build Coastguard Worker                 (struct ra_parallel_copy){
1579*61046927SAndroid Build Coastguard Worker                    .interval = dst_interval,
1580*61046927SAndroid Build Coastguard Worker                    .src = tied_physreg,
1581*61046927SAndroid Build Coastguard Worker                 });
1582*61046927SAndroid Build Coastguard Worker }
1583*61046927SAndroid Build Coastguard Worker 
1584*61046927SAndroid Build Coastguard Worker static void
allocate_dst(struct ra_ctx * ctx,struct ir3_register * dst)1585*61046927SAndroid Build Coastguard Worker allocate_dst(struct ra_ctx *ctx, struct ir3_register *dst)
1586*61046927SAndroid Build Coastguard Worker {
1587*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, dst);
1588*61046927SAndroid Build Coastguard Worker 
1589*61046927SAndroid Build Coastguard Worker    struct ir3_register *tied = dst->tied;
1590*61046927SAndroid Build Coastguard Worker    if (tied) {
1591*61046927SAndroid Build Coastguard Worker       struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
1592*61046927SAndroid Build Coastguard Worker       if (tied_interval->is_killed) {
1593*61046927SAndroid Build Coastguard Worker          /* The easy case: the source is killed, so we can just reuse it
1594*61046927SAndroid Build Coastguard Worker           * for the destination.
1595*61046927SAndroid Build Coastguard Worker           */
1596*61046927SAndroid Build Coastguard Worker          allocate_dst_fixed(ctx, dst, ra_interval_get_physreg(tied_interval));
1597*61046927SAndroid Build Coastguard Worker          return;
1598*61046927SAndroid Build Coastguard Worker       }
1599*61046927SAndroid Build Coastguard Worker    }
1600*61046927SAndroid Build Coastguard Worker 
1601*61046927SAndroid Build Coastguard Worker    /* All the hard work is done by get_reg here. */
1602*61046927SAndroid Build Coastguard Worker    physreg_t physreg = get_reg(ctx, file, dst);
1603*61046927SAndroid Build Coastguard Worker 
1604*61046927SAndroid Build Coastguard Worker    allocate_dst_fixed(ctx, dst, physreg);
1605*61046927SAndroid Build Coastguard Worker }
1606*61046927SAndroid Build Coastguard Worker 
1607*61046927SAndroid Build Coastguard Worker static void
assign_src(struct ra_ctx * ctx,struct ir3_instruction * instr,struct ir3_register * src)1608*61046927SAndroid Build Coastguard Worker assign_src(struct ra_ctx *ctx, struct ir3_instruction *instr,
1609*61046927SAndroid Build Coastguard Worker            struct ir3_register *src)
1610*61046927SAndroid Build Coastguard Worker {
1611*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[src->def->name];
1612*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, src);
1613*61046927SAndroid Build Coastguard Worker 
1614*61046927SAndroid Build Coastguard Worker    struct ir3_register *tied = src->tied;
1615*61046927SAndroid Build Coastguard Worker    physreg_t physreg;
1616*61046927SAndroid Build Coastguard Worker    if (tied) {
1617*61046927SAndroid Build Coastguard Worker       struct ra_interval *tied_interval = &ctx->intervals[tied->name];
1618*61046927SAndroid Build Coastguard Worker       physreg = ra_interval_get_physreg(tied_interval);
1619*61046927SAndroid Build Coastguard Worker    } else {
1620*61046927SAndroid Build Coastguard Worker       physreg = ra_interval_get_physreg(interval);
1621*61046927SAndroid Build Coastguard Worker    }
1622*61046927SAndroid Build Coastguard Worker 
1623*61046927SAndroid Build Coastguard Worker    assign_reg(instr, src, ra_physreg_to_num(physreg, src->flags));
1624*61046927SAndroid Build Coastguard Worker 
1625*61046927SAndroid Build Coastguard Worker    if (src->flags & IR3_REG_FIRST_KILL)
1626*61046927SAndroid Build Coastguard Worker       ra_file_remove(file, interval);
1627*61046927SAndroid Build Coastguard Worker }
1628*61046927SAndroid Build Coastguard Worker 
1629*61046927SAndroid Build Coastguard Worker /* Insert a parallel copy instruction before the instruction with the parallel
1630*61046927SAndroid Build Coastguard Worker  * copy entries we've built up.
1631*61046927SAndroid Build Coastguard Worker  */
1632*61046927SAndroid Build Coastguard Worker static void
insert_parallel_copy_instr(struct ra_ctx * ctx,struct ir3_instruction * instr)1633*61046927SAndroid Build Coastguard Worker insert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
1634*61046927SAndroid Build Coastguard Worker {
1635*61046927SAndroid Build Coastguard Worker    if (ctx->parallel_copies_count == 0)
1636*61046927SAndroid Build Coastguard Worker       return;
1637*61046927SAndroid Build Coastguard Worker 
1638*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *pcopy =
1639*61046927SAndroid Build Coastguard Worker       ir3_instr_create(instr->block, OPC_META_PARALLEL_COPY,
1640*61046927SAndroid Build Coastguard Worker                        ctx->parallel_copies_count, ctx->parallel_copies_count);
1641*61046927SAndroid Build Coastguard Worker 
1642*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
1643*61046927SAndroid Build Coastguard Worker       struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
1644*61046927SAndroid Build Coastguard Worker       struct ir3_register *reg =
1645*61046927SAndroid Build Coastguard Worker          ir3_dst_create(pcopy, INVALID_REG,
1646*61046927SAndroid Build Coastguard Worker                         entry->interval->interval.reg->flags &
1647*61046927SAndroid Build Coastguard Worker                         (IR3_REG_HALF | IR3_REG_ARRAY | IR3_REG_SHARED));
1648*61046927SAndroid Build Coastguard Worker       reg->size = entry->interval->interval.reg->size;
1649*61046927SAndroid Build Coastguard Worker       reg->wrmask = entry->interval->interval.reg->wrmask;
1650*61046927SAndroid Build Coastguard Worker       assign_reg(pcopy, reg, ra_interval_get_num(entry->interval));
1651*61046927SAndroid Build Coastguard Worker    }
1652*61046927SAndroid Build Coastguard Worker 
1653*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
1654*61046927SAndroid Build Coastguard Worker       struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
1655*61046927SAndroid Build Coastguard Worker       struct ir3_register *reg =
1656*61046927SAndroid Build Coastguard Worker          ir3_src_create(pcopy, INVALID_REG,
1657*61046927SAndroid Build Coastguard Worker                         entry->interval->interval.reg->flags &
1658*61046927SAndroid Build Coastguard Worker                         (IR3_REG_HALF | IR3_REG_ARRAY | IR3_REG_SHARED));
1659*61046927SAndroid Build Coastguard Worker       reg->size = entry->interval->interval.reg->size;
1660*61046927SAndroid Build Coastguard Worker       reg->wrmask = entry->interval->interval.reg->wrmask;
1661*61046927SAndroid Build Coastguard Worker       assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags));
1662*61046927SAndroid Build Coastguard Worker    }
1663*61046927SAndroid Build Coastguard Worker 
1664*61046927SAndroid Build Coastguard Worker    list_del(&pcopy->node);
1665*61046927SAndroid Build Coastguard Worker    list_addtail(&pcopy->node, &instr->node);
1666*61046927SAndroid Build Coastguard Worker    ctx->parallel_copies_count = 0;
1667*61046927SAndroid Build Coastguard Worker }
1668*61046927SAndroid Build Coastguard Worker 
1669*61046927SAndroid Build Coastguard Worker static void
handle_normal_instr(struct ra_ctx * ctx,struct ir3_instruction * instr)1670*61046927SAndroid Build Coastguard Worker handle_normal_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
1671*61046927SAndroid Build Coastguard Worker {
1672*61046927SAndroid Build Coastguard Worker    /* First, mark sources as going-to-be-killed while allocating the dest. */
1673*61046927SAndroid Build Coastguard Worker    ra_foreach_src (src, instr) {
1674*61046927SAndroid Build Coastguard Worker       mark_src_killed(ctx, src);
1675*61046927SAndroid Build Coastguard Worker    }
1676*61046927SAndroid Build Coastguard Worker 
1677*61046927SAndroid Build Coastguard Worker    /* Pre-insert tied dst copies. */
1678*61046927SAndroid Build Coastguard Worker    ra_foreach_dst (dst, instr) {
1679*61046927SAndroid Build Coastguard Worker       insert_tied_dst_copy(ctx, dst);
1680*61046927SAndroid Build Coastguard Worker    }
1681*61046927SAndroid Build Coastguard Worker 
1682*61046927SAndroid Build Coastguard Worker    /* Allocate the destination. */
1683*61046927SAndroid Build Coastguard Worker    ra_foreach_dst (dst, instr) {
1684*61046927SAndroid Build Coastguard Worker       allocate_dst(ctx, dst);
1685*61046927SAndroid Build Coastguard Worker    }
1686*61046927SAndroid Build Coastguard Worker 
1687*61046927SAndroid Build Coastguard Worker    /* Now handle sources. Go backward so that in case there are multiple
1688*61046927SAndroid Build Coastguard Worker     * sources with the same def and that def is killed we only remove it at
1689*61046927SAndroid Build Coastguard Worker     * the end.
1690*61046927SAndroid Build Coastguard Worker     */
1691*61046927SAndroid Build Coastguard Worker    ra_foreach_src_rev (src, instr) {
1692*61046927SAndroid Build Coastguard Worker       assign_src(ctx, instr, src);
1693*61046927SAndroid Build Coastguard Worker    }
1694*61046927SAndroid Build Coastguard Worker 
1695*61046927SAndroid Build Coastguard Worker    /* Now finally insert the destination into the map. */
1696*61046927SAndroid Build Coastguard Worker    ra_foreach_dst (dst, instr) {
1697*61046927SAndroid Build Coastguard Worker       insert_dst(ctx, dst);
1698*61046927SAndroid Build Coastguard Worker    }
1699*61046927SAndroid Build Coastguard Worker 
1700*61046927SAndroid Build Coastguard Worker    insert_parallel_copy_instr(ctx, instr);
1701*61046927SAndroid Build Coastguard Worker }
1702*61046927SAndroid Build Coastguard Worker 
1703*61046927SAndroid Build Coastguard Worker static void
handle_split(struct ra_ctx * ctx,struct ir3_instruction * instr)1704*61046927SAndroid Build Coastguard Worker handle_split(struct ra_ctx *ctx, struct ir3_instruction *instr)
1705*61046927SAndroid Build Coastguard Worker {
1706*61046927SAndroid Build Coastguard Worker    struct ir3_register *dst = instr->dsts[0];
1707*61046927SAndroid Build Coastguard Worker    struct ir3_register *src = instr->srcs[0];
1708*61046927SAndroid Build Coastguard Worker 
1709*61046927SAndroid Build Coastguard Worker    if (!(dst->flags & IR3_REG_SSA))
1710*61046927SAndroid Build Coastguard Worker       return;
1711*61046927SAndroid Build Coastguard Worker 
1712*61046927SAndroid Build Coastguard Worker    if (dst->merge_set == NULL || src->def->merge_set != dst->merge_set) {
1713*61046927SAndroid Build Coastguard Worker       handle_normal_instr(ctx, instr);
1714*61046927SAndroid Build Coastguard Worker       return;
1715*61046927SAndroid Build Coastguard Worker    }
1716*61046927SAndroid Build Coastguard Worker 
1717*61046927SAndroid Build Coastguard Worker    struct ra_interval *src_interval = &ctx->intervals[src->def->name];
1718*61046927SAndroid Build Coastguard Worker 
1719*61046927SAndroid Build Coastguard Worker    physreg_t physreg = ra_interval_get_physreg(src_interval);
1720*61046927SAndroid Build Coastguard Worker    assign_src(ctx, instr, src);
1721*61046927SAndroid Build Coastguard Worker 
1722*61046927SAndroid Build Coastguard Worker    allocate_dst_fixed(
1723*61046927SAndroid Build Coastguard Worker       ctx, dst, physreg - src->def->merge_set_offset + dst->merge_set_offset);
1724*61046927SAndroid Build Coastguard Worker    insert_dst(ctx, dst);
1725*61046927SAndroid Build Coastguard Worker }
1726*61046927SAndroid Build Coastguard Worker 
1727*61046927SAndroid Build Coastguard Worker static void
handle_collect(struct ra_ctx * ctx,struct ir3_instruction * instr)1728*61046927SAndroid Build Coastguard Worker handle_collect(struct ra_ctx *ctx, struct ir3_instruction *instr)
1729*61046927SAndroid Build Coastguard Worker {
1730*61046927SAndroid Build Coastguard Worker    if (!(instr->dsts[0]->flags & IR3_REG_SSA))
1731*61046927SAndroid Build Coastguard Worker       return;
1732*61046927SAndroid Build Coastguard Worker 
1733*61046927SAndroid Build Coastguard Worker    struct ir3_merge_set *dst_set = instr->dsts[0]->merge_set;
1734*61046927SAndroid Build Coastguard Worker    unsigned dst_offset = instr->dsts[0]->merge_set_offset;
1735*61046927SAndroid Build Coastguard Worker 
1736*61046927SAndroid Build Coastguard Worker    if (!dst_set || dst_set->regs_count == 1) {
1737*61046927SAndroid Build Coastguard Worker       handle_normal_instr(ctx, instr);
1738*61046927SAndroid Build Coastguard Worker       return;
1739*61046927SAndroid Build Coastguard Worker    }
1740*61046927SAndroid Build Coastguard Worker 
1741*61046927SAndroid Build Coastguard Worker    /* We need to check if any of the sources are contained in an interval
1742*61046927SAndroid Build Coastguard Worker     * that is at least as large as the vector. In this case, we should put
1743*61046927SAndroid Build Coastguard Worker     * the vector inside that larger interval. (There should be one
1744*61046927SAndroid Build Coastguard Worker     * unambiguous place to put it, because values sharing the same merge set
1745*61046927SAndroid Build Coastguard Worker     * should be allocated together.) This can happen in a case like:
1746*61046927SAndroid Build Coastguard Worker     *
1747*61046927SAndroid Build Coastguard Worker     * ssa_1 (wrmask=0xf) = ...
1748*61046927SAndroid Build Coastguard Worker     * ssa_2 = split ssa_1 off:0
1749*61046927SAndroid Build Coastguard Worker     * ssa_3 = split ssa_1 off:1
1750*61046927SAndroid Build Coastguard Worker     * ssa_4 (wrmask=0x3) = collect (kill)ssa_2, (kill)ssa_3
1751*61046927SAndroid Build Coastguard Worker     * ... = (kill)ssa_1
1752*61046927SAndroid Build Coastguard Worker     * ... = (kill)ssa_4
1753*61046927SAndroid Build Coastguard Worker     *
1754*61046927SAndroid Build Coastguard Worker     * ssa_4 will be coalesced with ssa_1 and needs to be allocated inside it.
1755*61046927SAndroid Build Coastguard Worker     */
1756*61046927SAndroid Build Coastguard Worker    physreg_t dst_fixed = (physreg_t)~0u;
1757*61046927SAndroid Build Coastguard Worker 
1758*61046927SAndroid Build Coastguard Worker    ra_foreach_src (src, instr) {
1759*61046927SAndroid Build Coastguard Worker       if (src->flags & IR3_REG_FIRST_KILL) {
1760*61046927SAndroid Build Coastguard Worker          mark_src_killed(ctx, src);
1761*61046927SAndroid Build Coastguard Worker       }
1762*61046927SAndroid Build Coastguard Worker 
1763*61046927SAndroid Build Coastguard Worker       struct ra_interval *interval = &ctx->intervals[src->def->name];
1764*61046927SAndroid Build Coastguard Worker 
1765*61046927SAndroid Build Coastguard Worker       /* We only need special handling if the source's interval overlaps with
1766*61046927SAndroid Build Coastguard Worker        * the destination's interval.
1767*61046927SAndroid Build Coastguard Worker        */
1768*61046927SAndroid Build Coastguard Worker       if (src->def->interval_start >= instr->dsts[0]->interval_end ||
1769*61046927SAndroid Build Coastguard Worker           instr->dsts[0]->interval_start >= src->def->interval_end ||
1770*61046927SAndroid Build Coastguard Worker           interval->is_killed)
1771*61046927SAndroid Build Coastguard Worker          continue;
1772*61046927SAndroid Build Coastguard Worker 
1773*61046927SAndroid Build Coastguard Worker       while (interval->interval.parent != NULL) {
1774*61046927SAndroid Build Coastguard Worker          interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
1775*61046927SAndroid Build Coastguard Worker       }
1776*61046927SAndroid Build Coastguard Worker       if (reg_size(interval->interval.reg) >= reg_size(instr->dsts[0])) {
1777*61046927SAndroid Build Coastguard Worker          dst_fixed = interval->physreg_start -
1778*61046927SAndroid Build Coastguard Worker                      interval->interval.reg->merge_set_offset + dst_offset;
1779*61046927SAndroid Build Coastguard Worker       } else {
1780*61046927SAndroid Build Coastguard Worker          /* For sources whose root interval is smaller than the
1781*61046927SAndroid Build Coastguard Worker           * destination (i.e. the normal case), we will shuffle them
1782*61046927SAndroid Build Coastguard Worker           * around after allocating the destination. Mark them killed so
1783*61046927SAndroid Build Coastguard Worker           * that the destination can be allocated over them, even if they
1784*61046927SAndroid Build Coastguard Worker           * aren't actually killed.
1785*61046927SAndroid Build Coastguard Worker           */
1786*61046927SAndroid Build Coastguard Worker          ra_file_mark_killed(ra_get_file(ctx, src), interval);
1787*61046927SAndroid Build Coastguard Worker       }
1788*61046927SAndroid Build Coastguard Worker    }
1789*61046927SAndroid Build Coastguard Worker 
1790*61046927SAndroid Build Coastguard Worker    if (dst_fixed != (physreg_t)~0u)
1791*61046927SAndroid Build Coastguard Worker       allocate_dst_fixed(ctx, instr->dsts[0], dst_fixed);
1792*61046927SAndroid Build Coastguard Worker    else
1793*61046927SAndroid Build Coastguard Worker       allocate_dst(ctx, instr->dsts[0]);
1794*61046927SAndroid Build Coastguard Worker 
1795*61046927SAndroid Build Coastguard Worker    /* Remove the temporary is_killed we added */
1796*61046927SAndroid Build Coastguard Worker    ra_foreach_src (src, instr) {
1797*61046927SAndroid Build Coastguard Worker       struct ra_interval *interval = &ctx->intervals[src->def->name];
1798*61046927SAndroid Build Coastguard Worker       while (interval->interval.parent != NULL) {
1799*61046927SAndroid Build Coastguard Worker          interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
1800*61046927SAndroid Build Coastguard Worker       }
1801*61046927SAndroid Build Coastguard Worker 
1802*61046927SAndroid Build Coastguard Worker       /* Filter out cases where it actually should be killed */
1803*61046927SAndroid Build Coastguard Worker       if (interval != &ctx->intervals[src->def->name] ||
1804*61046927SAndroid Build Coastguard Worker           !(src->flags & IR3_REG_KILL)) {
1805*61046927SAndroid Build Coastguard Worker          ra_file_unmark_killed(ra_get_file(ctx, src), interval);
1806*61046927SAndroid Build Coastguard Worker       }
1807*61046927SAndroid Build Coastguard Worker    }
1808*61046927SAndroid Build Coastguard Worker 
1809*61046927SAndroid Build Coastguard Worker    ra_foreach_src_rev (src, instr) {
1810*61046927SAndroid Build Coastguard Worker       assign_src(ctx, instr, src);
1811*61046927SAndroid Build Coastguard Worker    }
1812*61046927SAndroid Build Coastguard Worker 
1813*61046927SAndroid Build Coastguard Worker    /* We need to do this before insert_dst(), so that children of the
1814*61046927SAndroid Build Coastguard Worker     * destination which got marked as killed and then shuffled around to make
1815*61046927SAndroid Build Coastguard Worker     * space for the destination have the correct pcopy destination that
1816*61046927SAndroid Build Coastguard Worker     * matches what we assign the source of the collect to in assign_src().
1817*61046927SAndroid Build Coastguard Worker     *
1818*61046927SAndroid Build Coastguard Worker     * TODO: In this case we'll wind up copying the value in the pcopy and
1819*61046927SAndroid Build Coastguard Worker     * then again in the collect. We could avoid one of those by updating the
1820*61046927SAndroid Build Coastguard Worker     * pcopy destination to match up with the final location of the source
1821*61046927SAndroid Build Coastguard Worker     * after the collect and making the collect a no-op. However this doesn't
1822*61046927SAndroid Build Coastguard Worker     * seem to happen often.
1823*61046927SAndroid Build Coastguard Worker     */
1824*61046927SAndroid Build Coastguard Worker    insert_parallel_copy_instr(ctx, instr);
1825*61046927SAndroid Build Coastguard Worker 
1826*61046927SAndroid Build Coastguard Worker    /* Note: insert_dst will automatically shuffle around any intervals that
1827*61046927SAndroid Build Coastguard Worker     * are a child of the collect by making them children of the collect.
1828*61046927SAndroid Build Coastguard Worker     */
1829*61046927SAndroid Build Coastguard Worker 
1830*61046927SAndroid Build Coastguard Worker    insert_dst(ctx, instr->dsts[0]);
1831*61046927SAndroid Build Coastguard Worker }
1832*61046927SAndroid Build Coastguard Worker 
1833*61046927SAndroid Build Coastguard Worker /* Parallel copies before RA should only be at the end of the block, for
1834*61046927SAndroid Build Coastguard Worker  * phi's. For these we only need to fill in the sources, and then we fill in
1835*61046927SAndroid Build Coastguard Worker  * the destinations in the successor block.
1836*61046927SAndroid Build Coastguard Worker  */
1837*61046927SAndroid Build Coastguard Worker static void
handle_pcopy(struct ra_ctx * ctx,struct ir3_instruction * instr)1838*61046927SAndroid Build Coastguard Worker handle_pcopy(struct ra_ctx *ctx, struct ir3_instruction *instr)
1839*61046927SAndroid Build Coastguard Worker {
1840*61046927SAndroid Build Coastguard Worker    ra_foreach_src_rev (src, instr) {
1841*61046927SAndroid Build Coastguard Worker       assign_src(ctx, instr, src);
1842*61046927SAndroid Build Coastguard Worker    }
1843*61046927SAndroid Build Coastguard Worker }
1844*61046927SAndroid Build Coastguard Worker 
1845*61046927SAndroid Build Coastguard Worker /* Some inputs may need to be precolored. We need to handle those first, so
1846*61046927SAndroid Build Coastguard Worker  * that other non-precolored inputs don't accidentally get allocated over
1847*61046927SAndroid Build Coastguard Worker  * them. Inputs are the very first thing in the shader, so it shouldn't be a
1848*61046927SAndroid Build Coastguard Worker  * problem to allocate them to a specific physreg.
1849*61046927SAndroid Build Coastguard Worker  */
1850*61046927SAndroid Build Coastguard Worker 
1851*61046927SAndroid Build Coastguard Worker static void
handle_precolored_input(struct ra_ctx * ctx,struct ir3_instruction * instr)1852*61046927SAndroid Build Coastguard Worker handle_precolored_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
1853*61046927SAndroid Build Coastguard Worker {
1854*61046927SAndroid Build Coastguard Worker    if (instr->dsts[0]->num == INVALID_REG ||
1855*61046927SAndroid Build Coastguard Worker        !(instr->dsts[0]->flags & IR3_REG_SSA))
1856*61046927SAndroid Build Coastguard Worker       return;
1857*61046927SAndroid Build Coastguard Worker 
1858*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
1859*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
1860*61046927SAndroid Build Coastguard Worker    physreg_t physreg = ra_reg_get_physreg(instr->dsts[0]);
1861*61046927SAndroid Build Coastguard Worker    allocate_dst_fixed(ctx, instr->dsts[0], physreg);
1862*61046927SAndroid Build Coastguard Worker 
1863*61046927SAndroid Build Coastguard Worker    d("insert precolored dst %u physreg %u", instr->dsts[0]->name,
1864*61046927SAndroid Build Coastguard Worker      ra_interval_get_physreg(interval));
1865*61046927SAndroid Build Coastguard Worker 
1866*61046927SAndroid Build Coastguard Worker    ra_file_insert(file, interval);
1867*61046927SAndroid Build Coastguard Worker    interval->frozen = true;
1868*61046927SAndroid Build Coastguard Worker }
1869*61046927SAndroid Build Coastguard Worker 
1870*61046927SAndroid Build Coastguard Worker static void
handle_input(struct ra_ctx * ctx,struct ir3_instruction * instr)1871*61046927SAndroid Build Coastguard Worker handle_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
1872*61046927SAndroid Build Coastguard Worker {
1873*61046927SAndroid Build Coastguard Worker    if (instr->dsts[0]->num != INVALID_REG)
1874*61046927SAndroid Build Coastguard Worker       return;
1875*61046927SAndroid Build Coastguard Worker 
1876*61046927SAndroid Build Coastguard Worker    allocate_dst(ctx, instr->dsts[0]);
1877*61046927SAndroid Build Coastguard Worker 
1878*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
1879*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
1880*61046927SAndroid Build Coastguard Worker    ra_file_insert(file, interval);
1881*61046927SAndroid Build Coastguard Worker }
1882*61046927SAndroid Build Coastguard Worker 
1883*61046927SAndroid Build Coastguard Worker static void
assign_input(struct ra_ctx * ctx,struct ir3_instruction * instr)1884*61046927SAndroid Build Coastguard Worker assign_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
1885*61046927SAndroid Build Coastguard Worker {
1886*61046927SAndroid Build Coastguard Worker    if (!(instr->dsts[0]->flags & IR3_REG_SSA))
1887*61046927SAndroid Build Coastguard Worker       return;
1888*61046927SAndroid Build Coastguard Worker 
1889*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
1890*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
1891*61046927SAndroid Build Coastguard Worker 
1892*61046927SAndroid Build Coastguard Worker    if (instr->dsts[0]->num == INVALID_REG) {
1893*61046927SAndroid Build Coastguard Worker       assign_reg(instr, instr->dsts[0], ra_interval_get_num(interval));
1894*61046927SAndroid Build Coastguard Worker    } else {
1895*61046927SAndroid Build Coastguard Worker       interval->frozen = false;
1896*61046927SAndroid Build Coastguard Worker    }
1897*61046927SAndroid Build Coastguard Worker 
1898*61046927SAndroid Build Coastguard Worker    if (instr->dsts[0]->flags & IR3_REG_UNUSED)
1899*61046927SAndroid Build Coastguard Worker       ra_file_remove(file, interval);
1900*61046927SAndroid Build Coastguard Worker 
1901*61046927SAndroid Build Coastguard Worker    ra_foreach_src_rev (src, instr)
1902*61046927SAndroid Build Coastguard Worker       assign_src(ctx, instr, src);
1903*61046927SAndroid Build Coastguard Worker }
1904*61046927SAndroid Build Coastguard Worker 
1905*61046927SAndroid Build Coastguard Worker /* chmask is a bit weird, because it has pre-colored sources due to the need
1906*61046927SAndroid Build Coastguard Worker  * to pass some registers to the next stage. Fortunately there are only at
1907*61046927SAndroid Build Coastguard Worker  * most two, and there should be no other live values by the time we get to
1908*61046927SAndroid Build Coastguard Worker  * this instruction, so we only have to do the minimum and don't need any
1909*61046927SAndroid Build Coastguard Worker  * fancy fallbacks.
1910*61046927SAndroid Build Coastguard Worker  *
1911*61046927SAndroid Build Coastguard Worker  * TODO: Add more complete handling of precolored sources, e.g. for function
1912*61046927SAndroid Build Coastguard Worker  * argument handling. We'd need a way to mark sources as fixed so that they
1913*61046927SAndroid Build Coastguard Worker  * don't get moved around when placing other sources in the fallback case, and
1914*61046927SAndroid Build Coastguard Worker  * a duplication of much of the logic in get_reg(). This also opens another
1915*61046927SAndroid Build Coastguard Worker  * can of worms, e.g. what if the precolored source is a split of a vector
1916*61046927SAndroid Build Coastguard Worker  * which is still live -- this breaks our assumption that splits don't incur
1917*61046927SAndroid Build Coastguard Worker  * any "extra" register requirements and we'd have to break it out of the
1918*61046927SAndroid Build Coastguard Worker  * parent ra_interval.
1919*61046927SAndroid Build Coastguard Worker  */
1920*61046927SAndroid Build Coastguard Worker 
1921*61046927SAndroid Build Coastguard Worker static void
handle_precolored_source(struct ra_ctx * ctx,struct ir3_register * src)1922*61046927SAndroid Build Coastguard Worker handle_precolored_source(struct ra_ctx *ctx, struct ir3_register *src)
1923*61046927SAndroid Build Coastguard Worker {
1924*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, src);
1925*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[src->def->name];
1926*61046927SAndroid Build Coastguard Worker    physreg_t physreg = ra_reg_get_physreg(src);
1927*61046927SAndroid Build Coastguard Worker 
1928*61046927SAndroid Build Coastguard Worker    if (ra_interval_get_num(interval) == src->num)
1929*61046927SAndroid Build Coastguard Worker       return;
1930*61046927SAndroid Build Coastguard Worker 
1931*61046927SAndroid Build Coastguard Worker    /* Try evicting stuff in our way if it isn't free. This won't move
1932*61046927SAndroid Build Coastguard Worker     * anything unless it overlaps with our precolored physreg, so we don't
1933*61046927SAndroid Build Coastguard Worker     * have to worry about evicting other precolored sources.
1934*61046927SAndroid Build Coastguard Worker     */
1935*61046927SAndroid Build Coastguard Worker    if (!get_reg_specified(ctx, file, src, physreg, true)) {
1936*61046927SAndroid Build Coastguard Worker       unsigned eviction_count;
1937*61046927SAndroid Build Coastguard Worker       if (!try_evict_regs(ctx, file, src, physreg, &eviction_count, true,
1938*61046927SAndroid Build Coastguard Worker                           false)) {
1939*61046927SAndroid Build Coastguard Worker          unreachable("failed to evict for precolored source!");
1940*61046927SAndroid Build Coastguard Worker          return;
1941*61046927SAndroid Build Coastguard Worker       }
1942*61046927SAndroid Build Coastguard Worker    }
1943*61046927SAndroid Build Coastguard Worker 
1944*61046927SAndroid Build Coastguard Worker    ra_move_interval(ctx, file, interval, physreg);
1945*61046927SAndroid Build Coastguard Worker }
1946*61046927SAndroid Build Coastguard Worker 
1947*61046927SAndroid Build Coastguard Worker static void
handle_chmask(struct ra_ctx * ctx,struct ir3_instruction * instr)1948*61046927SAndroid Build Coastguard Worker handle_chmask(struct ra_ctx *ctx, struct ir3_instruction *instr)
1949*61046927SAndroid Build Coastguard Worker {
1950*61046927SAndroid Build Coastguard Worker    /* Note: we purposely don't mark sources as killed, so that we can reuse
1951*61046927SAndroid Build Coastguard Worker     * some of the get_reg() machinery as-if the source is a destination.
1952*61046927SAndroid Build Coastguard Worker     * Marking it as killed would make e.g. get_reg_specified() wouldn't work
1953*61046927SAndroid Build Coastguard Worker     * correctly.
1954*61046927SAndroid Build Coastguard Worker     */
1955*61046927SAndroid Build Coastguard Worker    ra_foreach_src (src, instr) {
1956*61046927SAndroid Build Coastguard Worker       assert(src->num != INVALID_REG);
1957*61046927SAndroid Build Coastguard Worker       handle_precolored_source(ctx, src);
1958*61046927SAndroid Build Coastguard Worker    }
1959*61046927SAndroid Build Coastguard Worker 
1960*61046927SAndroid Build Coastguard Worker    ra_foreach_src (src, instr) {
1961*61046927SAndroid Build Coastguard Worker       struct ra_file *file = ra_get_file(ctx, src);
1962*61046927SAndroid Build Coastguard Worker       struct ra_interval *interval = &ctx->intervals[src->def->name];
1963*61046927SAndroid Build Coastguard Worker       if (src->flags & IR3_REG_FIRST_KILL)
1964*61046927SAndroid Build Coastguard Worker          ra_file_remove(file, interval);
1965*61046927SAndroid Build Coastguard Worker    }
1966*61046927SAndroid Build Coastguard Worker 
1967*61046927SAndroid Build Coastguard Worker    insert_parallel_copy_instr(ctx, instr);
1968*61046927SAndroid Build Coastguard Worker }
1969*61046927SAndroid Build Coastguard Worker 
1970*61046927SAndroid Build Coastguard Worker static physreg_t
read_register(struct ra_ctx * ctx,struct ir3_block * block,struct ir3_register * def)1971*61046927SAndroid Build Coastguard Worker read_register(struct ra_ctx *ctx, struct ir3_block *block,
1972*61046927SAndroid Build Coastguard Worker               struct ir3_register *def)
1973*61046927SAndroid Build Coastguard Worker {
1974*61046927SAndroid Build Coastguard Worker    struct ra_block_state *state = &ctx->blocks[block->index];
1975*61046927SAndroid Build Coastguard Worker    if (state->renames) {
1976*61046927SAndroid Build Coastguard Worker       struct hash_entry *entry = _mesa_hash_table_search(state->renames, def);
1977*61046927SAndroid Build Coastguard Worker       if (entry) {
1978*61046927SAndroid Build Coastguard Worker          return (physreg_t)(uintptr_t)entry->data;
1979*61046927SAndroid Build Coastguard Worker       }
1980*61046927SAndroid Build Coastguard Worker    }
1981*61046927SAndroid Build Coastguard Worker 
1982*61046927SAndroid Build Coastguard Worker    return ra_reg_get_physreg(def);
1983*61046927SAndroid Build Coastguard Worker }
1984*61046927SAndroid Build Coastguard Worker 
1985*61046927SAndroid Build Coastguard Worker static void
handle_live_in(struct ra_ctx * ctx,struct ir3_register * def)1986*61046927SAndroid Build Coastguard Worker handle_live_in(struct ra_ctx *ctx, struct ir3_register *def)
1987*61046927SAndroid Build Coastguard Worker {
1988*61046927SAndroid Build Coastguard Worker    physreg_t physreg = ~0;
1989*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
1990*61046927SAndroid Build Coastguard Worker       struct ir3_block *pred = ctx->block->predecessors[i];
1991*61046927SAndroid Build Coastguard Worker       struct ra_block_state *pred_state = &ctx->blocks[pred->index];
1992*61046927SAndroid Build Coastguard Worker 
1993*61046927SAndroid Build Coastguard Worker       if (!pred_state->visited)
1994*61046927SAndroid Build Coastguard Worker          continue;
1995*61046927SAndroid Build Coastguard Worker 
1996*61046927SAndroid Build Coastguard Worker       physreg = read_register(ctx, pred, def);
1997*61046927SAndroid Build Coastguard Worker       break;
1998*61046927SAndroid Build Coastguard Worker    }
1999*61046927SAndroid Build Coastguard Worker 
2000*61046927SAndroid Build Coastguard Worker    assert(physreg != (physreg_t)~0);
2001*61046927SAndroid Build Coastguard Worker 
2002*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[def->name];
2003*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, def);
2004*61046927SAndroid Build Coastguard Worker    ra_interval_init(interval, def);
2005*61046927SAndroid Build Coastguard Worker    interval->physreg_start = physreg;
2006*61046927SAndroid Build Coastguard Worker    interval->physreg_end = physreg + reg_size(def);
2007*61046927SAndroid Build Coastguard Worker    ra_file_insert(file, interval);
2008*61046927SAndroid Build Coastguard Worker }
2009*61046927SAndroid Build Coastguard Worker 
2010*61046927SAndroid Build Coastguard Worker static void
handle_live_out(struct ra_ctx * ctx,struct ir3_register * def)2011*61046927SAndroid Build Coastguard Worker handle_live_out(struct ra_ctx *ctx, struct ir3_register *def)
2012*61046927SAndroid Build Coastguard Worker {
2013*61046927SAndroid Build Coastguard Worker    /* Skip parallelcopy's which in the original program are only used as phi
2014*61046927SAndroid Build Coastguard Worker     * arguments. Even though phi arguments are live out, they are only
2015*61046927SAndroid Build Coastguard Worker     * assigned when the phi is.
2016*61046927SAndroid Build Coastguard Worker     */
2017*61046927SAndroid Build Coastguard Worker    if (def->instr->opc == OPC_META_PARALLEL_COPY)
2018*61046927SAndroid Build Coastguard Worker       return;
2019*61046927SAndroid Build Coastguard Worker 
2020*61046927SAndroid Build Coastguard Worker    struct ra_block_state *state = &ctx->blocks[ctx->block->index];
2021*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[def->name];
2022*61046927SAndroid Build Coastguard Worker    physreg_t physreg = ra_interval_get_physreg(interval);
2023*61046927SAndroid Build Coastguard Worker    if (physreg != ra_reg_get_physreg(def)) {
2024*61046927SAndroid Build Coastguard Worker       if (!state->renames)
2025*61046927SAndroid Build Coastguard Worker          state->renames = _mesa_pointer_hash_table_create(ctx);
2026*61046927SAndroid Build Coastguard Worker       _mesa_hash_table_insert(state->renames, def, (void *)(uintptr_t)physreg);
2027*61046927SAndroid Build Coastguard Worker    }
2028*61046927SAndroid Build Coastguard Worker }
2029*61046927SAndroid Build Coastguard Worker 
2030*61046927SAndroid Build Coastguard Worker static void
handle_phi(struct ra_ctx * ctx,struct ir3_register * def)2031*61046927SAndroid Build Coastguard Worker handle_phi(struct ra_ctx *ctx, struct ir3_register *def)
2032*61046927SAndroid Build Coastguard Worker {
2033*61046927SAndroid Build Coastguard Worker    if (!(def->flags & IR3_REG_SSA))
2034*61046927SAndroid Build Coastguard Worker       return;
2035*61046927SAndroid Build Coastguard Worker 
2036*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, def);
2037*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[def->name];
2038*61046927SAndroid Build Coastguard Worker 
2039*61046927SAndroid Build Coastguard Worker    /* phis are always scalar, so they should already be the smallest possible
2040*61046927SAndroid Build Coastguard Worker     * size. However they may be coalesced with other live-in values/phi
2041*61046927SAndroid Build Coastguard Worker     * nodes, so check for that here.
2042*61046927SAndroid Build Coastguard Worker     */
2043*61046927SAndroid Build Coastguard Worker    struct ir3_reg_interval *parent_ir3 =
2044*61046927SAndroid Build Coastguard Worker       ir3_reg_interval_search(&file->reg_ctx.intervals, def->interval_start);
2045*61046927SAndroid Build Coastguard Worker    physreg_t physreg;
2046*61046927SAndroid Build Coastguard Worker    if (parent_ir3) {
2047*61046927SAndroid Build Coastguard Worker       struct ra_interval *parent = ir3_reg_interval_to_ra_interval(parent_ir3);
2048*61046927SAndroid Build Coastguard Worker       physreg = ra_interval_get_physreg(parent) +
2049*61046927SAndroid Build Coastguard Worker                 (def->interval_start - parent_ir3->reg->interval_start);
2050*61046927SAndroid Build Coastguard Worker    } else {
2051*61046927SAndroid Build Coastguard Worker       physreg = get_reg(ctx, file, def);
2052*61046927SAndroid Build Coastguard Worker    }
2053*61046927SAndroid Build Coastguard Worker 
2054*61046927SAndroid Build Coastguard Worker    allocate_dst_fixed(ctx, def, physreg);
2055*61046927SAndroid Build Coastguard Worker 
2056*61046927SAndroid Build Coastguard Worker    ra_file_insert(file, interval);
2057*61046927SAndroid Build Coastguard Worker }
2058*61046927SAndroid Build Coastguard Worker 
2059*61046927SAndroid Build Coastguard Worker static void
assign_phi(struct ra_ctx * ctx,struct ir3_instruction * phi)2060*61046927SAndroid Build Coastguard Worker assign_phi(struct ra_ctx *ctx, struct ir3_instruction *phi)
2061*61046927SAndroid Build Coastguard Worker {
2062*61046927SAndroid Build Coastguard Worker    if (!(phi->dsts[0]->flags & IR3_REG_SSA))
2063*61046927SAndroid Build Coastguard Worker       return;
2064*61046927SAndroid Build Coastguard Worker 
2065*61046927SAndroid Build Coastguard Worker    struct ra_file *file = ra_get_file(ctx, phi->dsts[0]);
2066*61046927SAndroid Build Coastguard Worker    struct ra_interval *interval = &ctx->intervals[phi->dsts[0]->name];
2067*61046927SAndroid Build Coastguard Worker    assert(!interval->interval.parent);
2068*61046927SAndroid Build Coastguard Worker    unsigned num = ra_interval_get_num(interval);
2069*61046927SAndroid Build Coastguard Worker    assign_reg(phi, phi->dsts[0], num);
2070*61046927SAndroid Build Coastguard Worker 
2071*61046927SAndroid Build Coastguard Worker    /* Assign the parallelcopy sources of this phi */
2072*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < phi->srcs_count; i++) {
2073*61046927SAndroid Build Coastguard Worker       if (phi->srcs[i]->def) {
2074*61046927SAndroid Build Coastguard Worker          assign_reg(phi, phi->srcs[i], num);
2075*61046927SAndroid Build Coastguard Worker          assign_reg(phi, phi->srcs[i]->def, num);
2076*61046927SAndroid Build Coastguard Worker       }
2077*61046927SAndroid Build Coastguard Worker    }
2078*61046927SAndroid Build Coastguard Worker 
2079*61046927SAndroid Build Coastguard Worker    if (phi->dsts[0]->flags & IR3_REG_UNUSED)
2080*61046927SAndroid Build Coastguard Worker       ra_file_remove(file, interval);
2081*61046927SAndroid Build Coastguard Worker }
2082*61046927SAndroid Build Coastguard Worker 
2083*61046927SAndroid Build Coastguard Worker /* When we split a live range, we sometimes need to emit fixup code at the end
2084*61046927SAndroid Build Coastguard Worker  * of a block. For example, something like:
2085*61046927SAndroid Build Coastguard Worker  *
2086*61046927SAndroid Build Coastguard Worker  * a = ...
2087*61046927SAndroid Build Coastguard Worker  * if (...) {
2088*61046927SAndroid Build Coastguard Worker  *    ...
2089*61046927SAndroid Build Coastguard Worker  *    a' = a
2090*61046927SAndroid Build Coastguard Worker  *    b = ... // a evicted to make room for b
2091*61046927SAndroid Build Coastguard Worker  *    ...
2092*61046927SAndroid Build Coastguard Worker  * }
2093*61046927SAndroid Build Coastguard Worker  * ... = a
2094*61046927SAndroid Build Coastguard Worker  *
2095*61046927SAndroid Build Coastguard Worker  * When we insert the copy to a' in insert_parallel_copy_instr(), this forces
2096*61046927SAndroid Build Coastguard Worker  * to insert another copy "a = a'" at the end of the if. Normally this would
2097*61046927SAndroid Build Coastguard Worker  * also entail adding a phi node, but since we're about to go out of SSA
2098*61046927SAndroid Build Coastguard Worker  * anyway we just insert an extra move. Note, however, that "b" might be used
2099*61046927SAndroid Build Coastguard Worker  * in a phi node at the end of the if and share registers with "a", so we
2100*61046927SAndroid Build Coastguard Worker  * have to be careful to extend any preexisting parallelcopy instruction
2101*61046927SAndroid Build Coastguard Worker  * instead of creating our own in order to guarantee that they properly get
2102*61046927SAndroid Build Coastguard Worker  * swapped.
2103*61046927SAndroid Build Coastguard Worker  */
2104*61046927SAndroid Build Coastguard Worker 
2105*61046927SAndroid Build Coastguard Worker static void
insert_liveout_copy(struct ir3_block * block,physreg_t dst,physreg_t src,struct ir3_register * reg)2106*61046927SAndroid Build Coastguard Worker insert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src,
2107*61046927SAndroid Build Coastguard Worker                     struct ir3_register *reg)
2108*61046927SAndroid Build Coastguard Worker {
2109*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *old_pcopy = NULL;
2110*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *last = ir3_block_get_last_non_terminator(block);
2111*61046927SAndroid Build Coastguard Worker 
2112*61046927SAndroid Build Coastguard Worker    if (last && last->opc == OPC_META_PARALLEL_COPY)
2113*61046927SAndroid Build Coastguard Worker       old_pcopy = last;
2114*61046927SAndroid Build Coastguard Worker 
2115*61046927SAndroid Build Coastguard Worker    unsigned old_pcopy_srcs = old_pcopy ? old_pcopy->srcs_count : 0;
2116*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *pcopy = ir3_instr_create(
2117*61046927SAndroid Build Coastguard Worker       block, OPC_META_PARALLEL_COPY, old_pcopy_srcs + 1, old_pcopy_srcs + 1);
2118*61046927SAndroid Build Coastguard Worker 
2119*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < old_pcopy_srcs; i++) {
2120*61046927SAndroid Build Coastguard Worker       old_pcopy->dsts[i]->instr = pcopy;
2121*61046927SAndroid Build Coastguard Worker       pcopy->dsts[pcopy->dsts_count++] = old_pcopy->dsts[i];
2122*61046927SAndroid Build Coastguard Worker    }
2123*61046927SAndroid Build Coastguard Worker 
2124*61046927SAndroid Build Coastguard Worker    unsigned flags = reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
2125*61046927SAndroid Build Coastguard Worker 
2126*61046927SAndroid Build Coastguard Worker    struct ir3_register *dst_reg = ir3_dst_create(pcopy, INVALID_REG, flags);
2127*61046927SAndroid Build Coastguard Worker    dst_reg->wrmask = reg->wrmask;
2128*61046927SAndroid Build Coastguard Worker    dst_reg->size = reg->size;
2129*61046927SAndroid Build Coastguard Worker    assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags));
2130*61046927SAndroid Build Coastguard Worker 
2131*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < old_pcopy_srcs; i++) {
2132*61046927SAndroid Build Coastguard Worker       pcopy->srcs[pcopy->srcs_count++] = old_pcopy->srcs[i];
2133*61046927SAndroid Build Coastguard Worker    }
2134*61046927SAndroid Build Coastguard Worker 
2135*61046927SAndroid Build Coastguard Worker    struct ir3_register *src_reg = ir3_src_create(pcopy, INVALID_REG, flags);
2136*61046927SAndroid Build Coastguard Worker    src_reg->wrmask = reg->wrmask;
2137*61046927SAndroid Build Coastguard Worker    src_reg->size = reg->size;
2138*61046927SAndroid Build Coastguard Worker    assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags));
2139*61046927SAndroid Build Coastguard Worker 
2140*61046927SAndroid Build Coastguard Worker    if (old_pcopy)
2141*61046927SAndroid Build Coastguard Worker       list_del(&old_pcopy->node);
2142*61046927SAndroid Build Coastguard Worker }
2143*61046927SAndroid Build Coastguard Worker 
2144*61046927SAndroid Build Coastguard Worker static void
insert_live_in_move(struct ra_ctx * ctx,struct ra_interval * interval)2145*61046927SAndroid Build Coastguard Worker insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
2146*61046927SAndroid Build Coastguard Worker {
2147*61046927SAndroid Build Coastguard Worker    physreg_t physreg = ra_interval_get_physreg(interval);
2148*61046927SAndroid Build Coastguard Worker 
2149*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
2150*61046927SAndroid Build Coastguard Worker       struct ir3_block *pred = ctx->block->predecessors[i];
2151*61046927SAndroid Build Coastguard Worker       struct ra_block_state *pred_state = &ctx->blocks[pred->index];
2152*61046927SAndroid Build Coastguard Worker 
2153*61046927SAndroid Build Coastguard Worker       if (!pred_state->visited)
2154*61046927SAndroid Build Coastguard Worker          continue;
2155*61046927SAndroid Build Coastguard Worker 
2156*61046927SAndroid Build Coastguard Worker       physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg);
2157*61046927SAndroid Build Coastguard Worker       if (pred_reg != physreg) {
2158*61046927SAndroid Build Coastguard Worker          assert(!(interval->interval.reg->flags & IR3_REG_SHARED));
2159*61046927SAndroid Build Coastguard Worker          insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg);
2160*61046927SAndroid Build Coastguard Worker       }
2161*61046927SAndroid Build Coastguard Worker    }
2162*61046927SAndroid Build Coastguard Worker }
2163*61046927SAndroid Build Coastguard Worker 
2164*61046927SAndroid Build Coastguard Worker static void
insert_file_live_in_moves(struct ra_ctx * ctx,struct ra_file * file)2165*61046927SAndroid Build Coastguard Worker insert_file_live_in_moves(struct ra_ctx *ctx, struct ra_file *file)
2166*61046927SAndroid Build Coastguard Worker {
2167*61046927SAndroid Build Coastguard Worker    BITSET_WORD *live_in = ctx->live->live_in[ctx->block->index];
2168*61046927SAndroid Build Coastguard Worker    rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
2169*61046927SAndroid Build Coastguard Worker                     physreg_node) {
2170*61046927SAndroid Build Coastguard Worker       /* Skip phi nodes. This needs to happen after phi nodes are allocated,
2171*61046927SAndroid Build Coastguard Worker        * because we may have to move live-ins around to make space for phi
2172*61046927SAndroid Build Coastguard Worker        * nodes, but we shouldn't be handling phi nodes here.
2173*61046927SAndroid Build Coastguard Worker        */
2174*61046927SAndroid Build Coastguard Worker       if (BITSET_TEST(live_in, interval->interval.reg->name))
2175*61046927SAndroid Build Coastguard Worker          insert_live_in_move(ctx, interval);
2176*61046927SAndroid Build Coastguard Worker    }
2177*61046927SAndroid Build Coastguard Worker }
2178*61046927SAndroid Build Coastguard Worker 
2179*61046927SAndroid Build Coastguard Worker static void
insert_entry_regs(struct ra_block_state * state,struct ra_file * file)2180*61046927SAndroid Build Coastguard Worker insert_entry_regs(struct ra_block_state *state, struct ra_file *file)
2181*61046927SAndroid Build Coastguard Worker {
2182*61046927SAndroid Build Coastguard Worker    rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
2183*61046927SAndroid Build Coastguard Worker                     physreg_node) {
2184*61046927SAndroid Build Coastguard Worker       _mesa_hash_table_insert(state->entry_regs, interval->interval.reg,
2185*61046927SAndroid Build Coastguard Worker                               (void *)(uintptr_t)interval->physreg_start);
2186*61046927SAndroid Build Coastguard Worker    }
2187*61046927SAndroid Build Coastguard Worker }
2188*61046927SAndroid Build Coastguard Worker 
2189*61046927SAndroid Build Coastguard Worker static void
insert_live_in_moves(struct ra_ctx * ctx)2190*61046927SAndroid Build Coastguard Worker insert_live_in_moves(struct ra_ctx *ctx)
2191*61046927SAndroid Build Coastguard Worker {
2192*61046927SAndroid Build Coastguard Worker    insert_file_live_in_moves(ctx, &ctx->full);
2193*61046927SAndroid Build Coastguard Worker    insert_file_live_in_moves(ctx, &ctx->half);
2194*61046927SAndroid Build Coastguard Worker    insert_file_live_in_moves(ctx, &ctx->shared);
2195*61046927SAndroid Build Coastguard Worker 
2196*61046927SAndroid Build Coastguard Worker    /* If not all predecessors are visited, insert live-in regs so that
2197*61046927SAndroid Build Coastguard Worker     * insert_live_out_moves() will work.
2198*61046927SAndroid Build Coastguard Worker     */
2199*61046927SAndroid Build Coastguard Worker    bool all_preds_visited = true;
2200*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
2201*61046927SAndroid Build Coastguard Worker       if (!ctx->blocks[ctx->block->predecessors[i]->index].visited) {
2202*61046927SAndroid Build Coastguard Worker          all_preds_visited = false;
2203*61046927SAndroid Build Coastguard Worker          break;
2204*61046927SAndroid Build Coastguard Worker       }
2205*61046927SAndroid Build Coastguard Worker    }
2206*61046927SAndroid Build Coastguard Worker 
2207*61046927SAndroid Build Coastguard Worker    if (!all_preds_visited) {
2208*61046927SAndroid Build Coastguard Worker       struct ra_block_state *state = &ctx->blocks[ctx->block->index];
2209*61046927SAndroid Build Coastguard Worker       state->entry_regs = _mesa_pointer_hash_table_create(ctx);
2210*61046927SAndroid Build Coastguard Worker 
2211*61046927SAndroid Build Coastguard Worker       insert_entry_regs(state, &ctx->full);
2212*61046927SAndroid Build Coastguard Worker       insert_entry_regs(state, &ctx->half);
2213*61046927SAndroid Build Coastguard Worker       insert_entry_regs(state, &ctx->shared);
2214*61046927SAndroid Build Coastguard Worker    }
2215*61046927SAndroid Build Coastguard Worker }
2216*61046927SAndroid Build Coastguard Worker 
2217*61046927SAndroid Build Coastguard Worker static void
insert_live_out_move(struct ra_ctx * ctx,struct ra_interval * interval)2218*61046927SAndroid Build Coastguard Worker insert_live_out_move(struct ra_ctx *ctx, struct ra_interval *interval)
2219*61046927SAndroid Build Coastguard Worker {
2220*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
2221*61046927SAndroid Build Coastguard Worker       if (!ctx->block->successors[i])
2222*61046927SAndroid Build Coastguard Worker          continue;
2223*61046927SAndroid Build Coastguard Worker 
2224*61046927SAndroid Build Coastguard Worker       struct ir3_block *succ = ctx->block->successors[i];
2225*61046927SAndroid Build Coastguard Worker       struct ra_block_state *succ_state = &ctx->blocks[succ->index];
2226*61046927SAndroid Build Coastguard Worker 
2227*61046927SAndroid Build Coastguard Worker       if (!succ_state->visited)
2228*61046927SAndroid Build Coastguard Worker          continue;
2229*61046927SAndroid Build Coastguard Worker 
2230*61046927SAndroid Build Coastguard Worker       struct hash_entry *entry = _mesa_hash_table_search(
2231*61046927SAndroid Build Coastguard Worker          succ_state->entry_regs, interval->interval.reg);
2232*61046927SAndroid Build Coastguard Worker       if (!entry)
2233*61046927SAndroid Build Coastguard Worker          continue;
2234*61046927SAndroid Build Coastguard Worker 
2235*61046927SAndroid Build Coastguard Worker       physreg_t new_reg = (physreg_t)(uintptr_t)entry->data;
2236*61046927SAndroid Build Coastguard Worker       if (new_reg != interval->physreg_start) {
2237*61046927SAndroid Build Coastguard Worker          insert_liveout_copy(ctx->block, new_reg, interval->physreg_start,
2238*61046927SAndroid Build Coastguard Worker                              interval->interval.reg);
2239*61046927SAndroid Build Coastguard Worker       }
2240*61046927SAndroid Build Coastguard Worker    }
2241*61046927SAndroid Build Coastguard Worker }
2242*61046927SAndroid Build Coastguard Worker 
2243*61046927SAndroid Build Coastguard Worker static void
insert_file_live_out_moves(struct ra_ctx * ctx,struct ra_file * file)2244*61046927SAndroid Build Coastguard Worker insert_file_live_out_moves(struct ra_ctx *ctx, struct ra_file *file)
2245*61046927SAndroid Build Coastguard Worker {
2246*61046927SAndroid Build Coastguard Worker    rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
2247*61046927SAndroid Build Coastguard Worker                     physreg_node) {
2248*61046927SAndroid Build Coastguard Worker       insert_live_out_move(ctx, interval);
2249*61046927SAndroid Build Coastguard Worker    }
2250*61046927SAndroid Build Coastguard Worker }
2251*61046927SAndroid Build Coastguard Worker 
2252*61046927SAndroid Build Coastguard Worker static void
insert_live_out_moves(struct ra_ctx * ctx)2253*61046927SAndroid Build Coastguard Worker insert_live_out_moves(struct ra_ctx *ctx)
2254*61046927SAndroid Build Coastguard Worker {
2255*61046927SAndroid Build Coastguard Worker    insert_file_live_out_moves(ctx, &ctx->full);
2256*61046927SAndroid Build Coastguard Worker    insert_file_live_out_moves(ctx, &ctx->half);
2257*61046927SAndroid Build Coastguard Worker    insert_file_live_out_moves(ctx, &ctx->shared);
2258*61046927SAndroid Build Coastguard Worker }
2259*61046927SAndroid Build Coastguard Worker 
2260*61046927SAndroid Build Coastguard Worker static void
handle_block(struct ra_ctx * ctx,struct ir3_block * block)2261*61046927SAndroid Build Coastguard Worker handle_block(struct ra_ctx *ctx, struct ir3_block *block)
2262*61046927SAndroid Build Coastguard Worker {
2263*61046927SAndroid Build Coastguard Worker    ctx->block = block;
2264*61046927SAndroid Build Coastguard Worker 
2265*61046927SAndroid Build Coastguard Worker    /* Reset the register files from the last block */
2266*61046927SAndroid Build Coastguard Worker    ra_file_init(&ctx->full);
2267*61046927SAndroid Build Coastguard Worker    ra_file_init(&ctx->half);
2268*61046927SAndroid Build Coastguard Worker    ra_file_init(&ctx->shared);
2269*61046927SAndroid Build Coastguard Worker 
2270*61046927SAndroid Build Coastguard Worker    /* Handle live-ins, phis, and input meta-instructions. These all appear
2271*61046927SAndroid Build Coastguard Worker     * live at the beginning of the block, and interfere with each other
2272*61046927SAndroid Build Coastguard Worker     * therefore need to be allocated "in parallel". This means that we
2273*61046927SAndroid Build Coastguard Worker     * have to allocate all of them, inserting them into the file, and then
2274*61046927SAndroid Build Coastguard Worker     * delay updating the IR until all of them are allocated.
2275*61046927SAndroid Build Coastguard Worker     *
2276*61046927SAndroid Build Coastguard Worker     * Handle precolored inputs first, because we need to make sure that other
2277*61046927SAndroid Build Coastguard Worker     * inputs don't overwrite them. We shouldn't have both live-ins/phi nodes
2278*61046927SAndroid Build Coastguard Worker     * and inputs at the same time, because the first block doesn't have
2279*61046927SAndroid Build Coastguard Worker     * predecessors. Therefore handle_live_in doesn't have to worry about
2280*61046927SAndroid Build Coastguard Worker     * them.
2281*61046927SAndroid Build Coastguard Worker     */
2282*61046927SAndroid Build Coastguard Worker 
2283*61046927SAndroid Build Coastguard Worker    foreach_instr (instr, &block->instr_list) {
2284*61046927SAndroid Build Coastguard Worker       if (instr->opc == OPC_META_INPUT)
2285*61046927SAndroid Build Coastguard Worker          handle_precolored_input(ctx, instr);
2286*61046927SAndroid Build Coastguard Worker       else
2287*61046927SAndroid Build Coastguard Worker          break;
2288*61046927SAndroid Build Coastguard Worker    }
2289*61046927SAndroid Build Coastguard Worker 
2290*61046927SAndroid Build Coastguard Worker    unsigned name;
2291*61046927SAndroid Build Coastguard Worker    BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
2292*61046927SAndroid Build Coastguard Worker                        ctx->live->definitions_count) {
2293*61046927SAndroid Build Coastguard Worker       struct ir3_register *reg = ctx->live->definitions[name];
2294*61046927SAndroid Build Coastguard Worker       handle_live_in(ctx, reg);
2295*61046927SAndroid Build Coastguard Worker    }
2296*61046927SAndroid Build Coastguard Worker 
2297*61046927SAndroid Build Coastguard Worker    foreach_instr (instr, &block->instr_list) {
2298*61046927SAndroid Build Coastguard Worker       if (instr->opc == OPC_META_PHI)
2299*61046927SAndroid Build Coastguard Worker          handle_phi(ctx, instr->dsts[0]);
2300*61046927SAndroid Build Coastguard Worker       else if (instr->opc == OPC_META_INPUT ||
2301*61046927SAndroid Build Coastguard Worker                instr->opc == OPC_META_TEX_PREFETCH)
2302*61046927SAndroid Build Coastguard Worker          handle_input(ctx, instr);
2303*61046927SAndroid Build Coastguard Worker       else
2304*61046927SAndroid Build Coastguard Worker          break;
2305*61046927SAndroid Build Coastguard Worker    }
2306*61046927SAndroid Build Coastguard Worker 
2307*61046927SAndroid Build Coastguard Worker    /* After this point, every live-in/phi/input has an interval assigned to
2308*61046927SAndroid Build Coastguard Worker     * it. We delay actually assigning values until everything has been
2309*61046927SAndroid Build Coastguard Worker     * allocated, so we can simply ignore any parallel copy entries created
2310*61046927SAndroid Build Coastguard Worker     * when shuffling them around.
2311*61046927SAndroid Build Coastguard Worker     */
2312*61046927SAndroid Build Coastguard Worker    ctx->parallel_copies_count = 0;
2313*61046927SAndroid Build Coastguard Worker 
2314*61046927SAndroid Build Coastguard Worker    insert_live_in_moves(ctx);
2315*61046927SAndroid Build Coastguard Worker 
2316*61046927SAndroid Build Coastguard Worker    if (RA_DEBUG) {
2317*61046927SAndroid Build Coastguard Worker       d("after live-in block %u:\n", block->index);
2318*61046927SAndroid Build Coastguard Worker       ra_ctx_dump(ctx);
2319*61046927SAndroid Build Coastguard Worker    }
2320*61046927SAndroid Build Coastguard Worker 
2321*61046927SAndroid Build Coastguard Worker    /* Now we're done with processing live-ins, and can handle the body of the
2322*61046927SAndroid Build Coastguard Worker     * block.
2323*61046927SAndroid Build Coastguard Worker     */
2324*61046927SAndroid Build Coastguard Worker    foreach_instr (instr, &block->instr_list) {
2325*61046927SAndroid Build Coastguard Worker       di(instr, "processing");
2326*61046927SAndroid Build Coastguard Worker 
2327*61046927SAndroid Build Coastguard Worker       if (instr->opc == OPC_META_PHI)
2328*61046927SAndroid Build Coastguard Worker          assign_phi(ctx, instr);
2329*61046927SAndroid Build Coastguard Worker       else if (instr->opc == OPC_META_INPUT ||
2330*61046927SAndroid Build Coastguard Worker                instr->opc == OPC_META_TEX_PREFETCH)
2331*61046927SAndroid Build Coastguard Worker          assign_input(ctx, instr);
2332*61046927SAndroid Build Coastguard Worker       else if (instr->opc == OPC_META_SPLIT)
2333*61046927SAndroid Build Coastguard Worker          handle_split(ctx, instr);
2334*61046927SAndroid Build Coastguard Worker       else if (instr->opc == OPC_META_COLLECT)
2335*61046927SAndroid Build Coastguard Worker          handle_collect(ctx, instr);
2336*61046927SAndroid Build Coastguard Worker       else if (instr->opc == OPC_META_PARALLEL_COPY)
2337*61046927SAndroid Build Coastguard Worker          handle_pcopy(ctx, instr);
2338*61046927SAndroid Build Coastguard Worker       else if (instr->opc == OPC_CHMASK)
2339*61046927SAndroid Build Coastguard Worker          handle_chmask(ctx, instr);
2340*61046927SAndroid Build Coastguard Worker       else
2341*61046927SAndroid Build Coastguard Worker          handle_normal_instr(ctx, instr);
2342*61046927SAndroid Build Coastguard Worker 
2343*61046927SAndroid Build Coastguard Worker       if (RA_DEBUG)
2344*61046927SAndroid Build Coastguard Worker          ra_ctx_dump(ctx);
2345*61046927SAndroid Build Coastguard Worker    }
2346*61046927SAndroid Build Coastguard Worker 
2347*61046927SAndroid Build Coastguard Worker    insert_live_out_moves(ctx);
2348*61046927SAndroid Build Coastguard Worker 
2349*61046927SAndroid Build Coastguard Worker    BITSET_FOREACH_SET (name, ctx->live->live_out[block->index],
2350*61046927SAndroid Build Coastguard Worker                        ctx->live->definitions_count) {
2351*61046927SAndroid Build Coastguard Worker       struct ir3_register *reg = ctx->live->definitions[name];
2352*61046927SAndroid Build Coastguard Worker       handle_live_out(ctx, reg);
2353*61046927SAndroid Build Coastguard Worker    }
2354*61046927SAndroid Build Coastguard Worker 
2355*61046927SAndroid Build Coastguard Worker    ctx->blocks[block->index].visited = true;
2356*61046927SAndroid Build Coastguard Worker }
2357*61046927SAndroid Build Coastguard Worker 
2358*61046927SAndroid Build Coastguard Worker static unsigned
calc_target_full_pressure(struct ir3_shader_variant * v,unsigned pressure)2359*61046927SAndroid Build Coastguard Worker calc_target_full_pressure(struct ir3_shader_variant *v, unsigned pressure)
2360*61046927SAndroid Build Coastguard Worker {
2361*61046927SAndroid Build Coastguard Worker    /* Registers are allocated in units of vec4, so switch from units of
2362*61046927SAndroid Build Coastguard Worker     * half-regs to vec4.
2363*61046927SAndroid Build Coastguard Worker     */
2364*61046927SAndroid Build Coastguard Worker    unsigned reg_count = DIV_ROUND_UP(pressure, 2 * 4);
2365*61046927SAndroid Build Coastguard Worker 
2366*61046927SAndroid Build Coastguard Worker    bool double_threadsize = ir3_should_double_threadsize(v, reg_count);
2367*61046927SAndroid Build Coastguard Worker 
2368*61046927SAndroid Build Coastguard Worker    unsigned target = reg_count;
2369*61046927SAndroid Build Coastguard Worker    unsigned reg_independent_max_waves =
2370*61046927SAndroid Build Coastguard Worker       ir3_get_reg_independent_max_waves(v, double_threadsize);
2371*61046927SAndroid Build Coastguard Worker    unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves(
2372*61046927SAndroid Build Coastguard Worker       v->compiler, reg_count, double_threadsize);
2373*61046927SAndroid Build Coastguard Worker    unsigned target_waves =
2374*61046927SAndroid Build Coastguard Worker       MIN2(reg_independent_max_waves, reg_dependent_max_waves);
2375*61046927SAndroid Build Coastguard Worker 
2376*61046927SAndroid Build Coastguard Worker    while (target <= RA_FULL_SIZE / (2 * 4) &&
2377*61046927SAndroid Build Coastguard Worker           ir3_should_double_threadsize(v, target) == double_threadsize &&
2378*61046927SAndroid Build Coastguard Worker           ir3_get_reg_dependent_max_waves(v->compiler, target,
2379*61046927SAndroid Build Coastguard Worker                                           double_threadsize) >= target_waves)
2380*61046927SAndroid Build Coastguard Worker       target++;
2381*61046927SAndroid Build Coastguard Worker 
2382*61046927SAndroid Build Coastguard Worker    return (target - 1) * 2 * 4;
2383*61046927SAndroid Build Coastguard Worker }
2384*61046927SAndroid Build Coastguard Worker 
2385*61046927SAndroid Build Coastguard Worker static void
add_pressure(struct ir3_pressure * pressure,struct ir3_register * reg,bool merged_regs)2386*61046927SAndroid Build Coastguard Worker add_pressure(struct ir3_pressure *pressure, struct ir3_register *reg,
2387*61046927SAndroid Build Coastguard Worker              bool merged_regs)
2388*61046927SAndroid Build Coastguard Worker {
2389*61046927SAndroid Build Coastguard Worker    unsigned size = reg_size(reg);
2390*61046927SAndroid Build Coastguard Worker    if (reg->flags & IR3_REG_HALF)
2391*61046927SAndroid Build Coastguard Worker       pressure->half += size;
2392*61046927SAndroid Build Coastguard Worker    if (!(reg->flags & IR3_REG_HALF) || merged_regs)
2393*61046927SAndroid Build Coastguard Worker       pressure->full += size;
2394*61046927SAndroid Build Coastguard Worker }
2395*61046927SAndroid Build Coastguard Worker 
2396*61046927SAndroid Build Coastguard Worker static void
dummy_interval_add(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)2397*61046927SAndroid Build Coastguard Worker dummy_interval_add(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval)
2398*61046927SAndroid Build Coastguard Worker {
2399*61046927SAndroid Build Coastguard Worker }
2400*61046927SAndroid Build Coastguard Worker 
2401*61046927SAndroid Build Coastguard Worker static void
dummy_interval_delete(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)2402*61046927SAndroid Build Coastguard Worker dummy_interval_delete(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval)
2403*61046927SAndroid Build Coastguard Worker {
2404*61046927SAndroid Build Coastguard Worker }
2405*61046927SAndroid Build Coastguard Worker 
2406*61046927SAndroid Build Coastguard Worker static void
dummy_interval_readd(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * parent,struct ir3_reg_interval * child)2407*61046927SAndroid Build Coastguard Worker dummy_interval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *parent,
2408*61046927SAndroid Build Coastguard Worker                      struct ir3_reg_interval *child)
2409*61046927SAndroid Build Coastguard Worker {
2410*61046927SAndroid Build Coastguard Worker }
2411*61046927SAndroid Build Coastguard Worker 
2412*61046927SAndroid Build Coastguard Worker /* Calculate the minimum possible limit on register pressure so that spilling
2413*61046927SAndroid Build Coastguard Worker  * still succeeds. Used to implement IR3_SHADER_DEBUG=spillall.
2414*61046927SAndroid Build Coastguard Worker  */
2415*61046927SAndroid Build Coastguard Worker 
2416*61046927SAndroid Build Coastguard Worker static void
calc_min_limit_pressure(struct ir3_shader_variant * v,struct ir3_liveness * live,struct ir3_pressure * limit)2417*61046927SAndroid Build Coastguard Worker calc_min_limit_pressure(struct ir3_shader_variant *v,
2418*61046927SAndroid Build Coastguard Worker                         struct ir3_liveness *live,
2419*61046927SAndroid Build Coastguard Worker                         struct ir3_pressure *limit)
2420*61046927SAndroid Build Coastguard Worker {
2421*61046927SAndroid Build Coastguard Worker    struct ir3_block *start = ir3_start_block(v->ir);
2422*61046927SAndroid Build Coastguard Worker    struct ir3_reg_ctx *ctx = ralloc(NULL, struct ir3_reg_ctx);
2423*61046927SAndroid Build Coastguard Worker    struct ir3_reg_interval *intervals =
2424*61046927SAndroid Build Coastguard Worker       rzalloc_array(ctx, struct ir3_reg_interval, live->definitions_count);
2425*61046927SAndroid Build Coastguard Worker 
2426*61046927SAndroid Build Coastguard Worker    ctx->interval_add = dummy_interval_add;
2427*61046927SAndroid Build Coastguard Worker    ctx->interval_delete = dummy_interval_delete;
2428*61046927SAndroid Build Coastguard Worker    ctx->interval_readd = dummy_interval_readd;
2429*61046927SAndroid Build Coastguard Worker 
2430*61046927SAndroid Build Coastguard Worker    limit->full = limit->half = 0;
2431*61046927SAndroid Build Coastguard Worker 
2432*61046927SAndroid Build Coastguard Worker    struct ir3_pressure cur_pressure = {0};
2433*61046927SAndroid Build Coastguard Worker    foreach_instr (input, &start->instr_list) {
2434*61046927SAndroid Build Coastguard Worker       if (input->opc != OPC_META_INPUT &&
2435*61046927SAndroid Build Coastguard Worker           input->opc != OPC_META_TEX_PREFETCH)
2436*61046927SAndroid Build Coastguard Worker          break;
2437*61046927SAndroid Build Coastguard Worker 
2438*61046927SAndroid Build Coastguard Worker       add_pressure(&cur_pressure, input->dsts[0], v->mergedregs);
2439*61046927SAndroid Build Coastguard Worker    }
2440*61046927SAndroid Build Coastguard Worker 
2441*61046927SAndroid Build Coastguard Worker    limit->full = MAX2(limit->full, cur_pressure.full);
2442*61046927SAndroid Build Coastguard Worker    limit->half = MAX2(limit->half, cur_pressure.half);
2443*61046927SAndroid Build Coastguard Worker 
2444*61046927SAndroid Build Coastguard Worker    foreach_instr (input, &start->instr_list) {
2445*61046927SAndroid Build Coastguard Worker       if (input->opc != OPC_META_INPUT &&
2446*61046927SAndroid Build Coastguard Worker           input->opc != OPC_META_TEX_PREFETCH)
2447*61046927SAndroid Build Coastguard Worker          break;
2448*61046927SAndroid Build Coastguard Worker 
2449*61046927SAndroid Build Coastguard Worker       /* pre-colored inputs may have holes, which increases the pressure. */
2450*61046927SAndroid Build Coastguard Worker       struct ir3_register *dst = input->dsts[0];
2451*61046927SAndroid Build Coastguard Worker       if (dst->num != INVALID_REG) {
2452*61046927SAndroid Build Coastguard Worker          unsigned physreg = ra_reg_get_physreg(dst) + reg_size(dst);
2453*61046927SAndroid Build Coastguard Worker          if (dst->flags & IR3_REG_HALF)
2454*61046927SAndroid Build Coastguard Worker             limit->half = MAX2(limit->half, physreg);
2455*61046927SAndroid Build Coastguard Worker          if (!(dst->flags & IR3_REG_HALF) || v->mergedregs)
2456*61046927SAndroid Build Coastguard Worker             limit->full = MAX2(limit->full, physreg);
2457*61046927SAndroid Build Coastguard Worker       }
2458*61046927SAndroid Build Coastguard Worker    }
2459*61046927SAndroid Build Coastguard Worker 
2460*61046927SAndroid Build Coastguard Worker    foreach_block (block, &v->ir->block_list) {
2461*61046927SAndroid Build Coastguard Worker       rb_tree_init(&ctx->intervals);
2462*61046927SAndroid Build Coastguard Worker 
2463*61046927SAndroid Build Coastguard Worker       unsigned name;
2464*61046927SAndroid Build Coastguard Worker       BITSET_FOREACH_SET (name, live->live_in[block->index],
2465*61046927SAndroid Build Coastguard Worker                           live->definitions_count) {
2466*61046927SAndroid Build Coastguard Worker          struct ir3_register *reg = live->definitions[name];
2467*61046927SAndroid Build Coastguard Worker          ir3_reg_interval_init(&intervals[reg->name], reg);
2468*61046927SAndroid Build Coastguard Worker          ir3_reg_interval_insert(ctx, &intervals[reg->name]);
2469*61046927SAndroid Build Coastguard Worker       }
2470*61046927SAndroid Build Coastguard Worker 
2471*61046927SAndroid Build Coastguard Worker       foreach_instr (instr, &block->instr_list) {
2472*61046927SAndroid Build Coastguard Worker          ra_foreach_dst (dst, instr) {
2473*61046927SAndroid Build Coastguard Worker             ir3_reg_interval_init(&intervals[dst->name], dst);
2474*61046927SAndroid Build Coastguard Worker          }
2475*61046927SAndroid Build Coastguard Worker          /* phis and parallel copies can be deleted via spilling */
2476*61046927SAndroid Build Coastguard Worker 
2477*61046927SAndroid Build Coastguard Worker          if (instr->opc == OPC_META_PHI) {
2478*61046927SAndroid Build Coastguard Worker             ir3_reg_interval_insert(ctx, &intervals[instr->dsts[0]->name]);
2479*61046927SAndroid Build Coastguard Worker             continue;
2480*61046927SAndroid Build Coastguard Worker          }
2481*61046927SAndroid Build Coastguard Worker 
2482*61046927SAndroid Build Coastguard Worker          if (instr->opc == OPC_META_PARALLEL_COPY)
2483*61046927SAndroid Build Coastguard Worker             continue;
2484*61046927SAndroid Build Coastguard Worker 
2485*61046927SAndroid Build Coastguard Worker          cur_pressure = (struct ir3_pressure) {0};
2486*61046927SAndroid Build Coastguard Worker 
2487*61046927SAndroid Build Coastguard Worker          ra_foreach_dst (dst, instr) {
2488*61046927SAndroid Build Coastguard Worker             if ((dst->tied && !(dst->tied->flags & IR3_REG_KILL)) ||
2489*61046927SAndroid Build Coastguard Worker                 (dst->flags & IR3_REG_EARLY_CLOBBER))
2490*61046927SAndroid Build Coastguard Worker                add_pressure(&cur_pressure, dst, v->mergedregs);
2491*61046927SAndroid Build Coastguard Worker          }
2492*61046927SAndroid Build Coastguard Worker 
2493*61046927SAndroid Build Coastguard Worker          ra_foreach_src_rev (src, instr) {
2494*61046927SAndroid Build Coastguard Worker             /* We currently don't support spilling the parent of a source when
2495*61046927SAndroid Build Coastguard Worker              * making space for sources, so we have to keep track of the
2496*61046927SAndroid Build Coastguard Worker              * intervals and figure out the root of the tree to figure out how
2497*61046927SAndroid Build Coastguard Worker              * much space we need.
2498*61046927SAndroid Build Coastguard Worker              *
2499*61046927SAndroid Build Coastguard Worker              * TODO: We should probably support this in the spiller.
2500*61046927SAndroid Build Coastguard Worker              */
2501*61046927SAndroid Build Coastguard Worker             struct ir3_reg_interval *interval = &intervals[src->def->name];
2502*61046927SAndroid Build Coastguard Worker             while (interval->parent)
2503*61046927SAndroid Build Coastguard Worker                interval = interval->parent;
2504*61046927SAndroid Build Coastguard Worker             add_pressure(&cur_pressure, interval->reg, v->mergedregs);
2505*61046927SAndroid Build Coastguard Worker 
2506*61046927SAndroid Build Coastguard Worker             if (src->flags & IR3_REG_FIRST_KILL)
2507*61046927SAndroid Build Coastguard Worker                ir3_reg_interval_remove(ctx, &intervals[src->def->name]);
2508*61046927SAndroid Build Coastguard Worker          }
2509*61046927SAndroid Build Coastguard Worker 
2510*61046927SAndroid Build Coastguard Worker          limit->full = MAX2(limit->full, cur_pressure.full);
2511*61046927SAndroid Build Coastguard Worker          limit->half = MAX2(limit->half, cur_pressure.half);
2512*61046927SAndroid Build Coastguard Worker 
2513*61046927SAndroid Build Coastguard Worker          cur_pressure = (struct ir3_pressure) {0};
2514*61046927SAndroid Build Coastguard Worker 
2515*61046927SAndroid Build Coastguard Worker          ra_foreach_dst (dst, instr) {
2516*61046927SAndroid Build Coastguard Worker             ir3_reg_interval_init(&intervals[dst->name], dst);
2517*61046927SAndroid Build Coastguard Worker             ir3_reg_interval_insert(ctx, &intervals[dst->name]);
2518*61046927SAndroid Build Coastguard Worker             add_pressure(&cur_pressure, dst, v->mergedregs);
2519*61046927SAndroid Build Coastguard Worker          }
2520*61046927SAndroid Build Coastguard Worker 
2521*61046927SAndroid Build Coastguard Worker          limit->full = MAX2(limit->full, cur_pressure.full);
2522*61046927SAndroid Build Coastguard Worker          limit->half = MAX2(limit->half, cur_pressure.half);
2523*61046927SAndroid Build Coastguard Worker       }
2524*61046927SAndroid Build Coastguard Worker    }
2525*61046927SAndroid Build Coastguard Worker 
2526*61046927SAndroid Build Coastguard Worker    /* Account for the base register, which needs to be available everywhere. */
2527*61046927SAndroid Build Coastguard Worker    limit->full += 2;
2528*61046927SAndroid Build Coastguard Worker 
2529*61046927SAndroid Build Coastguard Worker    ralloc_free(ctx);
2530*61046927SAndroid Build Coastguard Worker }
2531*61046927SAndroid Build Coastguard Worker 
2532*61046927SAndroid Build Coastguard Worker /*
2533*61046927SAndroid Build Coastguard Worker  * If barriers are used, it must be possible for all waves in the workgroup
2534*61046927SAndroid Build Coastguard Worker  * to execute concurrently. Thus we may have to reduce the registers limit.
2535*61046927SAndroid Build Coastguard Worker  */
2536*61046927SAndroid Build Coastguard Worker static void
calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant * v,struct ir3_pressure * limit_pressure)2537*61046927SAndroid Build Coastguard Worker calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v,
2538*61046927SAndroid Build Coastguard Worker                                         struct ir3_pressure *limit_pressure)
2539*61046927SAndroid Build Coastguard Worker {
2540*61046927SAndroid Build Coastguard Worker    const struct ir3_compiler *compiler = v->compiler;
2541*61046927SAndroid Build Coastguard Worker 
2542*61046927SAndroid Build Coastguard Worker    unsigned threads_per_wg;
2543*61046927SAndroid Build Coastguard Worker    if (v->local_size_variable) {
2544*61046927SAndroid Build Coastguard Worker       /* We have to expect the worst case. */
2545*61046927SAndroid Build Coastguard Worker       threads_per_wg = compiler->max_variable_workgroup_size;
2546*61046927SAndroid Build Coastguard Worker    } else {
2547*61046927SAndroid Build Coastguard Worker       threads_per_wg = v->local_size[0] * v->local_size[1] * v->local_size[2];
2548*61046927SAndroid Build Coastguard Worker    }
2549*61046927SAndroid Build Coastguard Worker 
2550*61046927SAndroid Build Coastguard Worker    /* The register file is grouped into reg_size_vec4 number of parts.
2551*61046927SAndroid Build Coastguard Worker     * Each part has enough registers to add a single vec4 register to
2552*61046927SAndroid Build Coastguard Worker     * each thread of a single-sized wave-pair. With double threadsize
2553*61046927SAndroid Build Coastguard Worker     * each wave-pair would consume two parts of the register file to get
2554*61046927SAndroid Build Coastguard Worker     * a single vec4 for a thread. The more active wave-pairs the less
2555*61046927SAndroid Build Coastguard Worker     * parts each could get.
2556*61046927SAndroid Build Coastguard Worker     */
2557*61046927SAndroid Build Coastguard Worker 
2558*61046927SAndroid Build Coastguard Worker    bool double_threadsize = ir3_should_double_threadsize(v, 0);
2559*61046927SAndroid Build Coastguard Worker    unsigned waves_per_wg = DIV_ROUND_UP(
2560*61046927SAndroid Build Coastguard Worker       threads_per_wg, compiler->threadsize_base * (double_threadsize ? 2 : 1) *
2561*61046927SAndroid Build Coastguard Worker                          compiler->wave_granularity);
2562*61046927SAndroid Build Coastguard Worker 
2563*61046927SAndroid Build Coastguard Worker    uint32_t vec4_regs_per_thread =
2564*61046927SAndroid Build Coastguard Worker       compiler->reg_size_vec4 / (waves_per_wg * (double_threadsize ? 2 : 1));
2565*61046927SAndroid Build Coastguard Worker    assert(vec4_regs_per_thread > 0);
2566*61046927SAndroid Build Coastguard Worker 
2567*61046927SAndroid Build Coastguard Worker    uint32_t half_regs_per_thread = vec4_regs_per_thread * 4 * 2;
2568*61046927SAndroid Build Coastguard Worker 
2569*61046927SAndroid Build Coastguard Worker    if (limit_pressure->full > half_regs_per_thread) {
2570*61046927SAndroid Build Coastguard Worker       if (v->mergedregs) {
2571*61046927SAndroid Build Coastguard Worker          limit_pressure->full = half_regs_per_thread;
2572*61046927SAndroid Build Coastguard Worker       } else {
2573*61046927SAndroid Build Coastguard Worker          /* TODO: Handle !mergedregs case, probably we would have to do this
2574*61046927SAndroid Build Coastguard Worker           * after the first register pressure pass.
2575*61046927SAndroid Build Coastguard Worker           */
2576*61046927SAndroid Build Coastguard Worker       }
2577*61046927SAndroid Build Coastguard Worker    }
2578*61046927SAndroid Build Coastguard Worker }
2579*61046927SAndroid Build Coastguard Worker 
2580*61046927SAndroid Build Coastguard Worker int
ir3_ra(struct ir3_shader_variant * v)2581*61046927SAndroid Build Coastguard Worker ir3_ra(struct ir3_shader_variant *v)
2582*61046927SAndroid Build Coastguard Worker {
2583*61046927SAndroid Build Coastguard Worker    ir3_calc_dominance(v->ir);
2584*61046927SAndroid Build Coastguard Worker 
2585*61046927SAndroid Build Coastguard Worker    /* Predicate RA needs dominance. */
2586*61046927SAndroid Build Coastguard Worker    ir3_ra_predicates(v);
2587*61046927SAndroid Build Coastguard Worker 
2588*61046927SAndroid Build Coastguard Worker    ir3_create_parallel_copies(v->ir);
2589*61046927SAndroid Build Coastguard Worker 
2590*61046927SAndroid Build Coastguard Worker    struct ra_ctx *ctx = rzalloc(NULL, struct ra_ctx);
2591*61046927SAndroid Build Coastguard Worker 
2592*61046927SAndroid Build Coastguard Worker    ctx->merged_regs = v->mergedregs;
2593*61046927SAndroid Build Coastguard Worker    ctx->compiler = v->compiler;
2594*61046927SAndroid Build Coastguard Worker    ctx->stage = v->type;
2595*61046927SAndroid Build Coastguard Worker 
2596*61046927SAndroid Build Coastguard Worker    struct ir3_liveness *live = ir3_calc_liveness(ctx, v->ir);
2597*61046927SAndroid Build Coastguard Worker 
2598*61046927SAndroid Build Coastguard Worker    ir3_debug_print(v->ir, "AFTER: create_parallel_copies");
2599*61046927SAndroid Build Coastguard Worker 
2600*61046927SAndroid Build Coastguard Worker    ir3_index_instrs_for_merge_sets(v->ir);
2601*61046927SAndroid Build Coastguard Worker    ir3_merge_regs(live, v->ir);
2602*61046927SAndroid Build Coastguard Worker 
2603*61046927SAndroid Build Coastguard Worker    bool has_shared_vectors = false;
2604*61046927SAndroid Build Coastguard Worker    foreach_block (block, &v->ir->block_list) {
2605*61046927SAndroid Build Coastguard Worker       foreach_instr (instr, &block->instr_list) {
2606*61046927SAndroid Build Coastguard Worker          ra_foreach_dst (dst, instr) {
2607*61046927SAndroid Build Coastguard Worker             if ((dst->flags & IR3_REG_SHARED) && reg_elems(dst) > 1) {
2608*61046927SAndroid Build Coastguard Worker                has_shared_vectors = true;
2609*61046927SAndroid Build Coastguard Worker                break;
2610*61046927SAndroid Build Coastguard Worker             }
2611*61046927SAndroid Build Coastguard Worker          }
2612*61046927SAndroid Build Coastguard Worker       }
2613*61046927SAndroid Build Coastguard Worker    }
2614*61046927SAndroid Build Coastguard Worker 
2615*61046927SAndroid Build Coastguard Worker    struct ir3_pressure max_pressure;
2616*61046927SAndroid Build Coastguard Worker    ir3_calc_pressure(v, live, &max_pressure);
2617*61046927SAndroid Build Coastguard Worker    d("max pressure:");
2618*61046927SAndroid Build Coastguard Worker    d("\tfull: %u", max_pressure.full);
2619*61046927SAndroid Build Coastguard Worker    d("\thalf: %u", max_pressure.half);
2620*61046927SAndroid Build Coastguard Worker    d("\tshared: %u", max_pressure.shared);
2621*61046927SAndroid Build Coastguard Worker 
2622*61046927SAndroid Build Coastguard Worker    struct ir3_pressure limit_pressure;
2623*61046927SAndroid Build Coastguard Worker    limit_pressure.full = RA_FULL_SIZE;
2624*61046927SAndroid Build Coastguard Worker    limit_pressure.half = RA_HALF_SIZE;
2625*61046927SAndroid Build Coastguard Worker    limit_pressure.shared = RA_SHARED_SIZE;
2626*61046927SAndroid Build Coastguard Worker    limit_pressure.shared_half = RA_SHARED_HALF_SIZE;
2627*61046927SAndroid Build Coastguard Worker 
2628*61046927SAndroid Build Coastguard Worker    if (gl_shader_stage_is_compute(v->type) && v->has_barrier) {
2629*61046927SAndroid Build Coastguard Worker       calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure);
2630*61046927SAndroid Build Coastguard Worker    }
2631*61046927SAndroid Build Coastguard Worker 
2632*61046927SAndroid Build Coastguard Worker    /* If the user forces a doubled threadsize, we may have to lower the limit
2633*61046927SAndroid Build Coastguard Worker     * because on some gens the register file is not big enough to hold a
2634*61046927SAndroid Build Coastguard Worker     * double-size wave with all 48 registers in use.
2635*61046927SAndroid Build Coastguard Worker     */
2636*61046927SAndroid Build Coastguard Worker    if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) {
2637*61046927SAndroid Build Coastguard Worker       limit_pressure.full =
2638*61046927SAndroid Build Coastguard Worker          MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
2639*61046927SAndroid Build Coastguard Worker    }
2640*61046927SAndroid Build Coastguard Worker 
2641*61046927SAndroid Build Coastguard Worker    /* If requested, lower the limit so that spilling happens more often. */
2642*61046927SAndroid Build Coastguard Worker    if (ir3_shader_debug & IR3_DBG_SPILLALL)
2643*61046927SAndroid Build Coastguard Worker       calc_min_limit_pressure(v, live, &limit_pressure);
2644*61046927SAndroid Build Coastguard Worker 
2645*61046927SAndroid Build Coastguard Worker    d("limit pressure:");
2646*61046927SAndroid Build Coastguard Worker    d("\tfull: %u", limit_pressure.full);
2647*61046927SAndroid Build Coastguard Worker    d("\thalf: %u", limit_pressure.half);
2648*61046927SAndroid Build Coastguard Worker    d("\tshared: %u", limit_pressure.shared);
2649*61046927SAndroid Build Coastguard Worker 
2650*61046927SAndroid Build Coastguard Worker    /* In the worst case, each half register could block one full register, so
2651*61046927SAndroid Build Coastguard Worker     * add shared_half in case of fragmentation. In addition, full registers can
2652*61046927SAndroid Build Coastguard Worker     * block half registers so we have to consider the total pressure against the
2653*61046927SAndroid Build Coastguard Worker     * half limit to prevent live range splitting when we run out of space for
2654*61046927SAndroid Build Coastguard Worker     * half registers in the bottom half.
2655*61046927SAndroid Build Coastguard Worker     */
2656*61046927SAndroid Build Coastguard Worker    if (max_pressure.shared + max_pressure.shared_half > limit_pressure.shared ||
2657*61046927SAndroid Build Coastguard Worker        (max_pressure.shared_half > 0 && max_pressure.shared > limit_pressure.shared_half) ||
2658*61046927SAndroid Build Coastguard Worker        has_shared_vectors) {
2659*61046927SAndroid Build Coastguard Worker       ir3_ra_shared(v, &live);
2660*61046927SAndroid Build Coastguard Worker       ir3_calc_pressure(v, live, &max_pressure);
2661*61046927SAndroid Build Coastguard Worker 
2662*61046927SAndroid Build Coastguard Worker       ir3_debug_print(v->ir, "AFTER: shared register allocation");
2663*61046927SAndroid Build Coastguard Worker    }
2664*61046927SAndroid Build Coastguard Worker 
2665*61046927SAndroid Build Coastguard Worker    bool spilled = false;
2666*61046927SAndroid Build Coastguard Worker    if (max_pressure.full > limit_pressure.full ||
2667*61046927SAndroid Build Coastguard Worker        max_pressure.half > limit_pressure.half) {
2668*61046927SAndroid Build Coastguard Worker       if (!v->compiler->has_pvtmem) {
2669*61046927SAndroid Build Coastguard Worker          d("max pressure exceeded!");
2670*61046927SAndroid Build Coastguard Worker          goto fail;
2671*61046927SAndroid Build Coastguard Worker       }
2672*61046927SAndroid Build Coastguard Worker       d("max pressure exceeded, spilling!");
2673*61046927SAndroid Build Coastguard Worker       IR3_PASS(v->ir, ir3_spill, v, &live, &limit_pressure);
2674*61046927SAndroid Build Coastguard Worker       ir3_calc_pressure(v, live, &max_pressure);
2675*61046927SAndroid Build Coastguard Worker 
2676*61046927SAndroid Build Coastguard Worker       d("max pressure after spilling:");
2677*61046927SAndroid Build Coastguard Worker       d("\tfull: %u", max_pressure.full);
2678*61046927SAndroid Build Coastguard Worker       d("\thalf: %u", max_pressure.half);
2679*61046927SAndroid Build Coastguard Worker       d("\tshared: %u", max_pressure.shared);
2680*61046927SAndroid Build Coastguard Worker 
2681*61046927SAndroid Build Coastguard Worker       assert(max_pressure.full <= limit_pressure.full &&
2682*61046927SAndroid Build Coastguard Worker              max_pressure.half <= limit_pressure.half);
2683*61046927SAndroid Build Coastguard Worker       spilled = true;
2684*61046927SAndroid Build Coastguard Worker    }
2685*61046927SAndroid Build Coastguard Worker 
2686*61046927SAndroid Build Coastguard Worker    ctx->live = live;
2687*61046927SAndroid Build Coastguard Worker    ctx->intervals =
2688*61046927SAndroid Build Coastguard Worker       rzalloc_array(ctx, struct ra_interval, live->definitions_count);
2689*61046927SAndroid Build Coastguard Worker    ctx->blocks = rzalloc_array(ctx, struct ra_block_state, live->block_count);
2690*61046927SAndroid Build Coastguard Worker 
2691*61046927SAndroid Build Coastguard Worker    ctx->full.size = calc_target_full_pressure(v, max_pressure.full);
2692*61046927SAndroid Build Coastguard Worker    d("full size: %u", ctx->full.size);
2693*61046927SAndroid Build Coastguard Worker 
2694*61046927SAndroid Build Coastguard Worker    if (!v->mergedregs)
2695*61046927SAndroid Build Coastguard Worker       ctx->half.size = RA_HALF_SIZE;
2696*61046927SAndroid Build Coastguard Worker 
2697*61046927SAndroid Build Coastguard Worker    ctx->shared.size = RA_SHARED_SIZE;
2698*61046927SAndroid Build Coastguard Worker 
2699*61046927SAndroid Build Coastguard Worker    ctx->full.start = ctx->half.start = ctx->shared.start = 0;
2700*61046927SAndroid Build Coastguard Worker 
2701*61046927SAndroid Build Coastguard Worker    foreach_block (block, &v->ir->block_list)
2702*61046927SAndroid Build Coastguard Worker       handle_block(ctx, block);
2703*61046927SAndroid Build Coastguard Worker 
2704*61046927SAndroid Build Coastguard Worker    ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count, false);
2705*61046927SAndroid Build Coastguard Worker 
2706*61046927SAndroid Build Coastguard Worker    /* Strip array-ness and SSA-ness at the end, because various helpers still
2707*61046927SAndroid Build Coastguard Worker     * need to work even on definitions that have already been assigned. For
2708*61046927SAndroid Build Coastguard Worker     * example, we need to preserve array-ness so that array live-ins have the
2709*61046927SAndroid Build Coastguard Worker     * right size.
2710*61046927SAndroid Build Coastguard Worker     */
2711*61046927SAndroid Build Coastguard Worker    foreach_block (block, &v->ir->block_list) {
2712*61046927SAndroid Build Coastguard Worker       foreach_instr (instr, &block->instr_list) {
2713*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < instr->dsts_count; i++) {
2714*61046927SAndroid Build Coastguard Worker             instr->dsts[i]->flags &= ~IR3_REG_SSA;
2715*61046927SAndroid Build Coastguard Worker 
2716*61046927SAndroid Build Coastguard Worker             /* Parallel copies of array registers copy the whole register, and
2717*61046927SAndroid Build Coastguard Worker              * we need some way to let the parallel copy code know that this was
2718*61046927SAndroid Build Coastguard Worker              * an array whose size is determined by reg->size. So keep the array
2719*61046927SAndroid Build Coastguard Worker              * flag on those. spill/reload also need to work on the entire
2720*61046927SAndroid Build Coastguard Worker              * array.
2721*61046927SAndroid Build Coastguard Worker              */
2722*61046927SAndroid Build Coastguard Worker             if (!is_meta(instr) && instr->opc != OPC_RELOAD_MACRO)
2723*61046927SAndroid Build Coastguard Worker                instr->dsts[i]->flags &= ~IR3_REG_ARRAY;
2724*61046927SAndroid Build Coastguard Worker          }
2725*61046927SAndroid Build Coastguard Worker 
2726*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < instr->srcs_count; i++) {
2727*61046927SAndroid Build Coastguard Worker             instr->srcs[i]->flags &= ~IR3_REG_SSA;
2728*61046927SAndroid Build Coastguard Worker 
2729*61046927SAndroid Build Coastguard Worker             if (!is_meta(instr) && instr->opc != OPC_SPILL_MACRO)
2730*61046927SAndroid Build Coastguard Worker                instr->srcs[i]->flags &= ~IR3_REG_ARRAY;
2731*61046927SAndroid Build Coastguard Worker          }
2732*61046927SAndroid Build Coastguard Worker       }
2733*61046927SAndroid Build Coastguard Worker    }
2734*61046927SAndroid Build Coastguard Worker 
2735*61046927SAndroid Build Coastguard Worker    ir3_debug_print(v->ir, "AFTER: register allocation");
2736*61046927SAndroid Build Coastguard Worker 
2737*61046927SAndroid Build Coastguard Worker    if (spilled) {
2738*61046927SAndroid Build Coastguard Worker       IR3_PASS(v->ir, ir3_lower_spill);
2739*61046927SAndroid Build Coastguard Worker    }
2740*61046927SAndroid Build Coastguard Worker 
2741*61046927SAndroid Build Coastguard Worker    ir3_lower_copies(v);
2742*61046927SAndroid Build Coastguard Worker 
2743*61046927SAndroid Build Coastguard Worker    ir3_debug_print(v->ir, "AFTER: ir3_lower_copies");
2744*61046927SAndroid Build Coastguard Worker 
2745*61046927SAndroid Build Coastguard Worker    ralloc_free(ctx);
2746*61046927SAndroid Build Coastguard Worker 
2747*61046927SAndroid Build Coastguard Worker    return 0;
2748*61046927SAndroid Build Coastguard Worker fail:
2749*61046927SAndroid Build Coastguard Worker    ralloc_free(ctx);
2750*61046927SAndroid Build Coastguard Worker    return -1;
2751*61046927SAndroid Build Coastguard Worker }
2752