1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2021 Valve Corporation
3*61046927SAndroid Build Coastguard Worker * Copyright © 2014 Rob Clark <[email protected]>
4*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker */
6*61046927SAndroid Build Coastguard Worker
7*61046927SAndroid Build Coastguard Worker #include "ir3_ra.h"
8*61046927SAndroid Build Coastguard Worker #include "util/rb_tree.h"
9*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
10*61046927SAndroid Build Coastguard Worker #include "ir3_shader.h"
11*61046927SAndroid Build Coastguard Worker
12*61046927SAndroid Build Coastguard Worker /* This file implements an SSA-based register allocator. Unlike other
13*61046927SAndroid Build Coastguard Worker * SSA-based allocators, it handles vector split/collect "smartly," meaning
14*61046927SAndroid Build Coastguard Worker * that multiple values may share the same register interval. From the
15*61046927SAndroid Build Coastguard Worker * perspective of the allocator itself, only the top-level intervals matter,
16*61046927SAndroid Build Coastguard Worker * and the allocator is only concerned with allocating top-level intervals,
17*61046927SAndroid Build Coastguard Worker * which may mean moving other top-level intervals around. Other intervals,
18*61046927SAndroid Build Coastguard Worker * like the destination of a split instruction or the source of a collect
19*61046927SAndroid Build Coastguard Worker * instruction, are "locked" to their parent interval. The details of this are
20*61046927SAndroid Build Coastguard Worker * mostly handled by ir3_merge_regs and ir3_reg_ctx.
21*61046927SAndroid Build Coastguard Worker *
22*61046927SAndroid Build Coastguard Worker * We currently don't do any backtracking, but we do use the merge sets as a
23*61046927SAndroid Build Coastguard Worker * form of affinity to try to avoid moves from phis/splits/collects. Each
24*61046927SAndroid Build Coastguard Worker * merge set is what a more "classic" graph-coloring or live-range based
25*61046927SAndroid Build Coastguard Worker * allocator would consider a single register, but here we use it as merely a
26*61046927SAndroid Build Coastguard Worker * hint, except when multiple overlapping values are live at the same time.
27*61046927SAndroid Build Coastguard Worker * Each merge set has a "preferred" register, and we try to honor that when
28*61046927SAndroid Build Coastguard Worker * allocating values in the merge set.
29*61046927SAndroid Build Coastguard Worker */
30*61046927SAndroid Build Coastguard Worker
31*61046927SAndroid Build Coastguard Worker /* ir3_reg_ctx implementation. */
32*61046927SAndroid Build Coastguard Worker
33*61046927SAndroid Build Coastguard Worker static int
ir3_reg_interval_cmp(const struct rb_node * node,const void * data)34*61046927SAndroid Build Coastguard Worker ir3_reg_interval_cmp(const struct rb_node *node, const void *data)
35*61046927SAndroid Build Coastguard Worker {
36*61046927SAndroid Build Coastguard Worker unsigned reg = *(const unsigned *)data;
37*61046927SAndroid Build Coastguard Worker const struct ir3_reg_interval *interval =
38*61046927SAndroid Build Coastguard Worker ir3_rb_node_to_interval_const(node);
39*61046927SAndroid Build Coastguard Worker if (interval->reg->interval_start > reg)
40*61046927SAndroid Build Coastguard Worker return -1;
41*61046927SAndroid Build Coastguard Worker else if (interval->reg->interval_end <= reg)
42*61046927SAndroid Build Coastguard Worker return 1;
43*61046927SAndroid Build Coastguard Worker else
44*61046927SAndroid Build Coastguard Worker return 0;
45*61046927SAndroid Build Coastguard Worker }
46*61046927SAndroid Build Coastguard Worker
47*61046927SAndroid Build Coastguard Worker static struct ir3_reg_interval *
ir3_reg_interval_search(struct rb_tree * tree,unsigned offset)48*61046927SAndroid Build Coastguard Worker ir3_reg_interval_search(struct rb_tree *tree, unsigned offset)
49*61046927SAndroid Build Coastguard Worker {
50*61046927SAndroid Build Coastguard Worker struct rb_node *node = rb_tree_search(tree, &offset, ir3_reg_interval_cmp);
51*61046927SAndroid Build Coastguard Worker return node ? ir3_rb_node_to_interval(node) : NULL;
52*61046927SAndroid Build Coastguard Worker }
53*61046927SAndroid Build Coastguard Worker
54*61046927SAndroid Build Coastguard Worker static struct ir3_reg_interval *
ir3_reg_interval_search_sloppy(struct rb_tree * tree,unsigned offset)55*61046927SAndroid Build Coastguard Worker ir3_reg_interval_search_sloppy(struct rb_tree *tree, unsigned offset)
56*61046927SAndroid Build Coastguard Worker {
57*61046927SAndroid Build Coastguard Worker struct rb_node *node =
58*61046927SAndroid Build Coastguard Worker rb_tree_search_sloppy(tree, &offset, ir3_reg_interval_cmp);
59*61046927SAndroid Build Coastguard Worker return node ? ir3_rb_node_to_interval(node) : NULL;
60*61046927SAndroid Build Coastguard Worker }
61*61046927SAndroid Build Coastguard Worker
62*61046927SAndroid Build Coastguard Worker /* Get the interval covering the reg, or the closest to the right if it
63*61046927SAndroid Build Coastguard Worker * doesn't exist.
64*61046927SAndroid Build Coastguard Worker */
65*61046927SAndroid Build Coastguard Worker static struct ir3_reg_interval *
ir3_reg_interval_search_right(struct rb_tree * tree,unsigned offset)66*61046927SAndroid Build Coastguard Worker ir3_reg_interval_search_right(struct rb_tree *tree, unsigned offset)
67*61046927SAndroid Build Coastguard Worker {
68*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *interval =
69*61046927SAndroid Build Coastguard Worker ir3_reg_interval_search_sloppy(tree, offset);
70*61046927SAndroid Build Coastguard Worker if (!interval) {
71*61046927SAndroid Build Coastguard Worker return NULL;
72*61046927SAndroid Build Coastguard Worker } else if (interval->reg->interval_end > offset) {
73*61046927SAndroid Build Coastguard Worker return interval;
74*61046927SAndroid Build Coastguard Worker } else {
75*61046927SAndroid Build Coastguard Worker /* There is no interval covering reg, and ra_file_search_sloppy()
76*61046927SAndroid Build Coastguard Worker * returned the closest range to the left, so the next interval to the
77*61046927SAndroid Build Coastguard Worker * right should be the closest to the right.
78*61046927SAndroid Build Coastguard Worker */
79*61046927SAndroid Build Coastguard Worker return ir3_reg_interval_next_or_null(interval);
80*61046927SAndroid Build Coastguard Worker }
81*61046927SAndroid Build Coastguard Worker }
82*61046927SAndroid Build Coastguard Worker
83*61046927SAndroid Build Coastguard Worker static int
ir3_reg_interval_insert_cmp(const struct rb_node * _a,const struct rb_node * _b)84*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert_cmp(const struct rb_node *_a, const struct rb_node *_b)
85*61046927SAndroid Build Coastguard Worker {
86*61046927SAndroid Build Coastguard Worker const struct ir3_reg_interval *a = ir3_rb_node_to_interval_const(_a);
87*61046927SAndroid Build Coastguard Worker const struct ir3_reg_interval *b = ir3_rb_node_to_interval_const(_b);
88*61046927SAndroid Build Coastguard Worker return b->reg->interval_start - a->reg->interval_start;
89*61046927SAndroid Build Coastguard Worker }
90*61046927SAndroid Build Coastguard Worker
91*61046927SAndroid Build Coastguard Worker static void
interval_insert(struct ir3_reg_ctx * ctx,struct rb_tree * tree,struct ir3_reg_interval * interval)92*61046927SAndroid Build Coastguard Worker interval_insert(struct ir3_reg_ctx *ctx, struct rb_tree *tree,
93*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *interval)
94*61046927SAndroid Build Coastguard Worker {
95*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *right =
96*61046927SAndroid Build Coastguard Worker ir3_reg_interval_search_right(tree, interval->reg->interval_start);
97*61046927SAndroid Build Coastguard Worker if (right && right->reg->interval_start < interval->reg->interval_end) {
98*61046927SAndroid Build Coastguard Worker /* We disallow trees where different members have different half-ness.
99*61046927SAndroid Build Coastguard Worker * This means that we can't treat bitcasts as copies like normal
100*61046927SAndroid Build Coastguard Worker * split/collect, so something like this would require an extra copy
101*61046927SAndroid Build Coastguard Worker * in mergedregs mode, and count as 4 half-units of register pressure
102*61046927SAndroid Build Coastguard Worker * instead of 2:
103*61046927SAndroid Build Coastguard Worker *
104*61046927SAndroid Build Coastguard Worker * f16vec2 foo = unpackFloat2x16(bar)
105*61046927SAndroid Build Coastguard Worker * ... = foo.x
106*61046927SAndroid Build Coastguard Worker * ... = bar
107*61046927SAndroid Build Coastguard Worker *
108*61046927SAndroid Build Coastguard Worker * However, relaxing this rule would open a huge can of worms. What
109*61046927SAndroid Build Coastguard Worker * happens when there's a vector of 16 things, and the fifth element
110*61046927SAndroid Build Coastguard Worker * has been bitcasted as a half-reg? Would that element alone have to
111*61046927SAndroid Build Coastguard Worker * be small enough to be used as a half-reg source? Let's keep that
112*61046927SAndroid Build Coastguard Worker * can of worms firmly shut for now.
113*61046927SAndroid Build Coastguard Worker */
114*61046927SAndroid Build Coastguard Worker assert((interval->reg->flags & IR3_REG_HALF) ==
115*61046927SAndroid Build Coastguard Worker (right->reg->flags & IR3_REG_HALF));
116*61046927SAndroid Build Coastguard Worker
117*61046927SAndroid Build Coastguard Worker if (right->reg->interval_end <= interval->reg->interval_end &&
118*61046927SAndroid Build Coastguard Worker right->reg->interval_start >= interval->reg->interval_start) {
119*61046927SAndroid Build Coastguard Worker /* Check if we're inserting something that's already inserted */
120*61046927SAndroid Build Coastguard Worker assert(interval != right);
121*61046927SAndroid Build Coastguard Worker
122*61046927SAndroid Build Coastguard Worker /* "right" is contained in "interval" and must become a child of
123*61046927SAndroid Build Coastguard Worker * it. There may be further children too.
124*61046927SAndroid Build Coastguard Worker */
125*61046927SAndroid Build Coastguard Worker for (struct ir3_reg_interval *next = ir3_reg_interval_next(right);
126*61046927SAndroid Build Coastguard Worker right && right->reg->interval_start < interval->reg->interval_end;
127*61046927SAndroid Build Coastguard Worker right = next, next = ir3_reg_interval_next_or_null(next)) {
128*61046927SAndroid Build Coastguard Worker /* "right" must be contained in "interval." */
129*61046927SAndroid Build Coastguard Worker assert(right->reg->interval_end <= interval->reg->interval_end);
130*61046927SAndroid Build Coastguard Worker assert((interval->reg->flags & IR3_REG_HALF) ==
131*61046927SAndroid Build Coastguard Worker (right->reg->flags & IR3_REG_HALF));
132*61046927SAndroid Build Coastguard Worker if (!right->parent)
133*61046927SAndroid Build Coastguard Worker ctx->interval_delete(ctx, right);
134*61046927SAndroid Build Coastguard Worker right->parent = interval;
135*61046927SAndroid Build Coastguard Worker rb_tree_remove(tree, &right->node);
136*61046927SAndroid Build Coastguard Worker rb_tree_insert(&interval->children, &right->node,
137*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert_cmp);
138*61046927SAndroid Build Coastguard Worker }
139*61046927SAndroid Build Coastguard Worker } else {
140*61046927SAndroid Build Coastguard Worker /* "right" must contain "interval," since intervals must form a
141*61046927SAndroid Build Coastguard Worker * tree.
142*61046927SAndroid Build Coastguard Worker */
143*61046927SAndroid Build Coastguard Worker assert(right->reg->interval_start <= interval->reg->interval_start);
144*61046927SAndroid Build Coastguard Worker interval->parent = right;
145*61046927SAndroid Build Coastguard Worker interval_insert(ctx, &right->children, interval);
146*61046927SAndroid Build Coastguard Worker return;
147*61046927SAndroid Build Coastguard Worker }
148*61046927SAndroid Build Coastguard Worker }
149*61046927SAndroid Build Coastguard Worker
150*61046927SAndroid Build Coastguard Worker if (!interval->parent)
151*61046927SAndroid Build Coastguard Worker ctx->interval_add(ctx, interval);
152*61046927SAndroid Build Coastguard Worker rb_tree_insert(tree, &interval->node, ir3_reg_interval_insert_cmp);
153*61046927SAndroid Build Coastguard Worker interval->inserted = true;
154*61046927SAndroid Build Coastguard Worker }
155*61046927SAndroid Build Coastguard Worker
156*61046927SAndroid Build Coastguard Worker void
ir3_reg_interval_insert(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)157*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert(struct ir3_reg_ctx *ctx,
158*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *interval)
159*61046927SAndroid Build Coastguard Worker {
160*61046927SAndroid Build Coastguard Worker rb_tree_init(&interval->children);
161*61046927SAndroid Build Coastguard Worker interval->parent = NULL;
162*61046927SAndroid Build Coastguard Worker interval_insert(ctx, &ctx->intervals, interval);
163*61046927SAndroid Build Coastguard Worker }
164*61046927SAndroid Build Coastguard Worker
165*61046927SAndroid Build Coastguard Worker /* Call after ir3_reg_interval_remove_temp() to reinsert the interval */
166*61046927SAndroid Build Coastguard Worker static void
ir3_reg_interval_reinsert(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)167*61046927SAndroid Build Coastguard Worker ir3_reg_interval_reinsert(struct ir3_reg_ctx *ctx,
168*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *interval)
169*61046927SAndroid Build Coastguard Worker {
170*61046927SAndroid Build Coastguard Worker interval->parent = NULL;
171*61046927SAndroid Build Coastguard Worker interval_insert(ctx, &ctx->intervals, interval);
172*61046927SAndroid Build Coastguard Worker }
173*61046927SAndroid Build Coastguard Worker
174*61046927SAndroid Build Coastguard Worker void
ir3_reg_interval_remove(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)175*61046927SAndroid Build Coastguard Worker ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
176*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *interval)
177*61046927SAndroid Build Coastguard Worker {
178*61046927SAndroid Build Coastguard Worker assert(interval->inserted);
179*61046927SAndroid Build Coastguard Worker
180*61046927SAndroid Build Coastguard Worker if (interval->parent) {
181*61046927SAndroid Build Coastguard Worker rb_tree_remove(&interval->parent->children, &interval->node);
182*61046927SAndroid Build Coastguard Worker } else {
183*61046927SAndroid Build Coastguard Worker ctx->interval_delete(ctx, interval);
184*61046927SAndroid Build Coastguard Worker rb_tree_remove(&ctx->intervals, &interval->node);
185*61046927SAndroid Build Coastguard Worker }
186*61046927SAndroid Build Coastguard Worker
187*61046927SAndroid Build Coastguard Worker rb_tree_foreach_safe (struct ir3_reg_interval, child, &interval->children,
188*61046927SAndroid Build Coastguard Worker node) {
189*61046927SAndroid Build Coastguard Worker rb_tree_remove(&interval->children, &child->node);
190*61046927SAndroid Build Coastguard Worker child->parent = interval->parent;
191*61046927SAndroid Build Coastguard Worker
192*61046927SAndroid Build Coastguard Worker if (interval->parent) {
193*61046927SAndroid Build Coastguard Worker rb_tree_insert(&child->parent->children, &child->node,
194*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert_cmp);
195*61046927SAndroid Build Coastguard Worker } else {
196*61046927SAndroid Build Coastguard Worker ctx->interval_readd(ctx, interval, child);
197*61046927SAndroid Build Coastguard Worker rb_tree_insert(&ctx->intervals, &child->node,
198*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert_cmp);
199*61046927SAndroid Build Coastguard Worker }
200*61046927SAndroid Build Coastguard Worker }
201*61046927SAndroid Build Coastguard Worker
202*61046927SAndroid Build Coastguard Worker interval->inserted = false;
203*61046927SAndroid Build Coastguard Worker }
204*61046927SAndroid Build Coastguard Worker
205*61046927SAndroid Build Coastguard Worker static void
_mark_free(struct ir3_reg_interval * interval)206*61046927SAndroid Build Coastguard Worker _mark_free(struct ir3_reg_interval *interval)
207*61046927SAndroid Build Coastguard Worker {
208*61046927SAndroid Build Coastguard Worker interval->inserted = false;
209*61046927SAndroid Build Coastguard Worker rb_tree_foreach (struct ir3_reg_interval, child, &interval->children, node) {
210*61046927SAndroid Build Coastguard Worker _mark_free(child);
211*61046927SAndroid Build Coastguard Worker }
212*61046927SAndroid Build Coastguard Worker }
213*61046927SAndroid Build Coastguard Worker
214*61046927SAndroid Build Coastguard Worker /* Remove an interval and all its children from the tree. */
215*61046927SAndroid Build Coastguard Worker void
ir3_reg_interval_remove_all(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)216*61046927SAndroid Build Coastguard Worker ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx,
217*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *interval)
218*61046927SAndroid Build Coastguard Worker {
219*61046927SAndroid Build Coastguard Worker assert(!interval->parent);
220*61046927SAndroid Build Coastguard Worker
221*61046927SAndroid Build Coastguard Worker ctx->interval_delete(ctx, interval);
222*61046927SAndroid Build Coastguard Worker rb_tree_remove(&ctx->intervals, &interval->node);
223*61046927SAndroid Build Coastguard Worker _mark_free(interval);
224*61046927SAndroid Build Coastguard Worker }
225*61046927SAndroid Build Coastguard Worker
226*61046927SAndroid Build Coastguard Worker /* Used when popping an interval to be shuffled around. Don't disturb children
227*61046927SAndroid Build Coastguard Worker * so that it can be later reinserted.
228*61046927SAndroid Build Coastguard Worker */
229*61046927SAndroid Build Coastguard Worker static void
ir3_reg_interval_remove_temp(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)230*61046927SAndroid Build Coastguard Worker ir3_reg_interval_remove_temp(struct ir3_reg_ctx *ctx,
231*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *interval)
232*61046927SAndroid Build Coastguard Worker {
233*61046927SAndroid Build Coastguard Worker assert(!interval->parent);
234*61046927SAndroid Build Coastguard Worker
235*61046927SAndroid Build Coastguard Worker ctx->interval_delete(ctx, interval);
236*61046927SAndroid Build Coastguard Worker rb_tree_remove(&ctx->intervals, &interval->node);
237*61046927SAndroid Build Coastguard Worker }
238*61046927SAndroid Build Coastguard Worker
239*61046927SAndroid Build Coastguard Worker static void
interval_dump(struct log_stream * stream,struct ir3_reg_interval * interval,unsigned indent)240*61046927SAndroid Build Coastguard Worker interval_dump(struct log_stream *stream, struct ir3_reg_interval *interval,
241*61046927SAndroid Build Coastguard Worker unsigned indent)
242*61046927SAndroid Build Coastguard Worker {
243*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < indent; i++)
244*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "\t");
245*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "reg %u start %u\n", interval->reg->name,
246*61046927SAndroid Build Coastguard Worker interval->reg->interval_start);
247*61046927SAndroid Build Coastguard Worker
248*61046927SAndroid Build Coastguard Worker rb_tree_foreach (struct ir3_reg_interval, child, &interval->children, node) {
249*61046927SAndroid Build Coastguard Worker interval_dump(stream, child, indent + 1);
250*61046927SAndroid Build Coastguard Worker }
251*61046927SAndroid Build Coastguard Worker
252*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < indent; i++)
253*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "\t");
254*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "reg %u end %u\n", interval->reg->name,
255*61046927SAndroid Build Coastguard Worker interval->reg->interval_end);
256*61046927SAndroid Build Coastguard Worker }
257*61046927SAndroid Build Coastguard Worker
258*61046927SAndroid Build Coastguard Worker void
ir3_reg_interval_dump(struct log_stream * stream,struct ir3_reg_interval * interval)259*61046927SAndroid Build Coastguard Worker ir3_reg_interval_dump(struct log_stream *stream, struct ir3_reg_interval *interval)
260*61046927SAndroid Build Coastguard Worker {
261*61046927SAndroid Build Coastguard Worker interval_dump(stream, interval, 0);
262*61046927SAndroid Build Coastguard Worker }
263*61046927SAndroid Build Coastguard Worker
264*61046927SAndroid Build Coastguard Worker /* These are the core datastructures used by the register allocator. First
265*61046927SAndroid Build Coastguard Worker * ra_interval and ra_file, which are used for intra-block tracking and use
266*61046927SAndroid Build Coastguard Worker * the ir3_reg_ctx infrastructure:
267*61046927SAndroid Build Coastguard Worker */
268*61046927SAndroid Build Coastguard Worker
269*61046927SAndroid Build Coastguard Worker struct ra_interval {
270*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval interval;
271*61046927SAndroid Build Coastguard Worker
272*61046927SAndroid Build Coastguard Worker struct rb_node physreg_node;
273*61046927SAndroid Build Coastguard Worker physreg_t physreg_start, physreg_end;
274*61046927SAndroid Build Coastguard Worker
275*61046927SAndroid Build Coastguard Worker /* True if this is a source of the current instruction which is entirely
276*61046927SAndroid Build Coastguard Worker * killed. This means we can allocate the dest over it, but we can't break
277*61046927SAndroid Build Coastguard Worker * it up.
278*61046927SAndroid Build Coastguard Worker */
279*61046927SAndroid Build Coastguard Worker bool is_killed;
280*61046927SAndroid Build Coastguard Worker
281*61046927SAndroid Build Coastguard Worker /* True if this interval cannot be moved from its position. This is only
282*61046927SAndroid Build Coastguard Worker * used for precolored inputs to ensure that other inputs don't get
283*61046927SAndroid Build Coastguard Worker * allocated on top of them.
284*61046927SAndroid Build Coastguard Worker */
285*61046927SAndroid Build Coastguard Worker bool frozen;
286*61046927SAndroid Build Coastguard Worker };
287*61046927SAndroid Build Coastguard Worker
288*61046927SAndroid Build Coastguard Worker struct ra_file {
289*61046927SAndroid Build Coastguard Worker struct ir3_reg_ctx reg_ctx;
290*61046927SAndroid Build Coastguard Worker
291*61046927SAndroid Build Coastguard Worker BITSET_DECLARE(available, RA_MAX_FILE_SIZE);
292*61046927SAndroid Build Coastguard Worker BITSET_DECLARE(available_to_evict, RA_MAX_FILE_SIZE);
293*61046927SAndroid Build Coastguard Worker
294*61046927SAndroid Build Coastguard Worker struct rb_tree physreg_intervals;
295*61046927SAndroid Build Coastguard Worker
296*61046927SAndroid Build Coastguard Worker unsigned size;
297*61046927SAndroid Build Coastguard Worker unsigned start;
298*61046927SAndroid Build Coastguard Worker };
299*61046927SAndroid Build Coastguard Worker
300*61046927SAndroid Build Coastguard Worker /* State for inter-block tracking. When we split a live range to make space
301*61046927SAndroid Build Coastguard Worker * for a vector, we may need to insert fixup code when a block has multiple
302*61046927SAndroid Build Coastguard Worker * predecessors that have moved the same live value to different registers.
303*61046927SAndroid Build Coastguard Worker * This keeps track of state required to do that.
304*61046927SAndroid Build Coastguard Worker */
305*61046927SAndroid Build Coastguard Worker
306*61046927SAndroid Build Coastguard Worker struct ra_block_state {
307*61046927SAndroid Build Coastguard Worker /* Map of defining ir3_register -> physreg it was allocated to at the end
308*61046927SAndroid Build Coastguard Worker * of the block.
309*61046927SAndroid Build Coastguard Worker */
310*61046927SAndroid Build Coastguard Worker struct hash_table *renames;
311*61046927SAndroid Build Coastguard Worker
312*61046927SAndroid Build Coastguard Worker /* For loops, we need to process a block before all its predecessors have
313*61046927SAndroid Build Coastguard Worker * been processed. In particular, we need to pick registers for values
314*61046927SAndroid Build Coastguard Worker * without knowing if all the predecessors have been renamed. This keeps
315*61046927SAndroid Build Coastguard Worker * track of the registers we chose so that when we visit the back-edge we
316*61046927SAndroid Build Coastguard Worker * can move them appropriately. If all predecessors have been visited
317*61046927SAndroid Build Coastguard Worker * before this block is visited then we don't need to fill this out. This
318*61046927SAndroid Build Coastguard Worker * is a map from ir3_register -> physreg.
319*61046927SAndroid Build Coastguard Worker */
320*61046927SAndroid Build Coastguard Worker struct hash_table *entry_regs;
321*61046927SAndroid Build Coastguard Worker
322*61046927SAndroid Build Coastguard Worker /* True if the block has been visited and "renames" is complete.
323*61046927SAndroid Build Coastguard Worker */
324*61046927SAndroid Build Coastguard Worker bool visited;
325*61046927SAndroid Build Coastguard Worker };
326*61046927SAndroid Build Coastguard Worker
327*61046927SAndroid Build Coastguard Worker struct ra_parallel_copy {
328*61046927SAndroid Build Coastguard Worker struct ra_interval *interval;
329*61046927SAndroid Build Coastguard Worker physreg_t src;
330*61046927SAndroid Build Coastguard Worker };
331*61046927SAndroid Build Coastguard Worker
332*61046927SAndroid Build Coastguard Worker /* The main context: */
333*61046927SAndroid Build Coastguard Worker
334*61046927SAndroid Build Coastguard Worker struct ra_ctx {
335*61046927SAndroid Build Coastguard Worker /* r0.x - r47.w. On a6xx with merged-regs, hr0.x-hr47.w go into the bottom
336*61046927SAndroid Build Coastguard Worker * half of this file too.
337*61046927SAndroid Build Coastguard Worker */
338*61046927SAndroid Build Coastguard Worker struct ra_file full;
339*61046927SAndroid Build Coastguard Worker
340*61046927SAndroid Build Coastguard Worker /* hr0.x - hr63.w, only used without merged-regs. */
341*61046927SAndroid Build Coastguard Worker struct ra_file half;
342*61046927SAndroid Build Coastguard Worker
343*61046927SAndroid Build Coastguard Worker /* Shared regs. */
344*61046927SAndroid Build Coastguard Worker struct ra_file shared;
345*61046927SAndroid Build Coastguard Worker
346*61046927SAndroid Build Coastguard Worker struct ir3_liveness *live;
347*61046927SAndroid Build Coastguard Worker
348*61046927SAndroid Build Coastguard Worker struct ir3_block *block;
349*61046927SAndroid Build Coastguard Worker
350*61046927SAndroid Build Coastguard Worker const struct ir3_compiler *compiler;
351*61046927SAndroid Build Coastguard Worker gl_shader_stage stage;
352*61046927SAndroid Build Coastguard Worker
353*61046927SAndroid Build Coastguard Worker /* Pending moves of top-level intervals that will be emitted once we're
354*61046927SAndroid Build Coastguard Worker * finished:
355*61046927SAndroid Build Coastguard Worker */
356*61046927SAndroid Build Coastguard Worker DECLARE_ARRAY(struct ra_parallel_copy, parallel_copies);
357*61046927SAndroid Build Coastguard Worker
358*61046927SAndroid Build Coastguard Worker struct ra_interval *intervals;
359*61046927SAndroid Build Coastguard Worker struct ra_block_state *blocks;
360*61046927SAndroid Build Coastguard Worker
361*61046927SAndroid Build Coastguard Worker bool merged_regs;
362*61046927SAndroid Build Coastguard Worker };
363*61046927SAndroid Build Coastguard Worker
364*61046927SAndroid Build Coastguard Worker #define foreach_interval(interval, file) \
365*61046927SAndroid Build Coastguard Worker rb_tree_foreach (struct ra_interval, interval, &(file)->physreg_intervals, \
366*61046927SAndroid Build Coastguard Worker physreg_node)
367*61046927SAndroid Build Coastguard Worker #define foreach_interval_rev(interval, file) \
368*61046927SAndroid Build Coastguard Worker rb_tree_foreach (struct ra_interval, interval, &(file)->physreg_intervals, \
369*61046927SAndroid Build Coastguard Worker physreg_node)
370*61046927SAndroid Build Coastguard Worker #define foreach_interval_safe(interval, file) \
371*61046927SAndroid Build Coastguard Worker rb_tree_foreach_safe (struct ra_interval, interval, \
372*61046927SAndroid Build Coastguard Worker &(file)->physreg_intervals, physreg_node)
373*61046927SAndroid Build Coastguard Worker #define foreach_interval_rev_safe(interval, file) \
374*61046927SAndroid Build Coastguard Worker rb_tree_foreach_rev_safe(struct ra_interval, interval, \
375*61046927SAndroid Build Coastguard Worker &(file)->physreg_intervals, physreg_node)
376*61046927SAndroid Build Coastguard Worker
377*61046927SAndroid Build Coastguard Worker static struct ra_interval *
rb_node_to_interval(struct rb_node * node)378*61046927SAndroid Build Coastguard Worker rb_node_to_interval(struct rb_node *node)
379*61046927SAndroid Build Coastguard Worker {
380*61046927SAndroid Build Coastguard Worker return rb_node_data(struct ra_interval, node, physreg_node);
381*61046927SAndroid Build Coastguard Worker }
382*61046927SAndroid Build Coastguard Worker
383*61046927SAndroid Build Coastguard Worker static const struct ra_interval *
rb_node_to_interval_const(const struct rb_node * node)384*61046927SAndroid Build Coastguard Worker rb_node_to_interval_const(const struct rb_node *node)
385*61046927SAndroid Build Coastguard Worker {
386*61046927SAndroid Build Coastguard Worker return rb_node_data(struct ra_interval, node, physreg_node);
387*61046927SAndroid Build Coastguard Worker }
388*61046927SAndroid Build Coastguard Worker
389*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_interval_next(struct ra_interval * interval)390*61046927SAndroid Build Coastguard Worker ra_interval_next(struct ra_interval *interval)
391*61046927SAndroid Build Coastguard Worker {
392*61046927SAndroid Build Coastguard Worker struct rb_node *next = rb_node_next(&interval->physreg_node);
393*61046927SAndroid Build Coastguard Worker return next ? rb_node_to_interval(next) : NULL;
394*61046927SAndroid Build Coastguard Worker }
395*61046927SAndroid Build Coastguard Worker
396*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_interval_next_or_null(struct ra_interval * interval)397*61046927SAndroid Build Coastguard Worker ra_interval_next_or_null(struct ra_interval *interval)
398*61046927SAndroid Build Coastguard Worker {
399*61046927SAndroid Build Coastguard Worker return interval ? ra_interval_next(interval) : NULL;
400*61046927SAndroid Build Coastguard Worker }
401*61046927SAndroid Build Coastguard Worker
402*61046927SAndroid Build Coastguard Worker static int
ra_interval_cmp(const struct rb_node * node,const void * data)403*61046927SAndroid Build Coastguard Worker ra_interval_cmp(const struct rb_node *node, const void *data)
404*61046927SAndroid Build Coastguard Worker {
405*61046927SAndroid Build Coastguard Worker physreg_t reg = *(const physreg_t *)data;
406*61046927SAndroid Build Coastguard Worker const struct ra_interval *interval = rb_node_to_interval_const(node);
407*61046927SAndroid Build Coastguard Worker if (interval->physreg_start > reg)
408*61046927SAndroid Build Coastguard Worker return -1;
409*61046927SAndroid Build Coastguard Worker else if (interval->physreg_end <= reg)
410*61046927SAndroid Build Coastguard Worker return 1;
411*61046927SAndroid Build Coastguard Worker else
412*61046927SAndroid Build Coastguard Worker return 0;
413*61046927SAndroid Build Coastguard Worker }
414*61046927SAndroid Build Coastguard Worker
415*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_interval_search_sloppy(struct rb_tree * tree,physreg_t reg)416*61046927SAndroid Build Coastguard Worker ra_interval_search_sloppy(struct rb_tree *tree, physreg_t reg)
417*61046927SAndroid Build Coastguard Worker {
418*61046927SAndroid Build Coastguard Worker struct rb_node *node = rb_tree_search_sloppy(tree, ®, ra_interval_cmp);
419*61046927SAndroid Build Coastguard Worker return node ? rb_node_to_interval(node) : NULL;
420*61046927SAndroid Build Coastguard Worker }
421*61046927SAndroid Build Coastguard Worker
422*61046927SAndroid Build Coastguard Worker /* Get the interval covering the reg, or the closest to the right if it
423*61046927SAndroid Build Coastguard Worker * doesn't exist.
424*61046927SAndroid Build Coastguard Worker */
425*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_interval_search_right(struct rb_tree * tree,physreg_t reg)426*61046927SAndroid Build Coastguard Worker ra_interval_search_right(struct rb_tree *tree, physreg_t reg)
427*61046927SAndroid Build Coastguard Worker {
428*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = ra_interval_search_sloppy(tree, reg);
429*61046927SAndroid Build Coastguard Worker if (!interval) {
430*61046927SAndroid Build Coastguard Worker return NULL;
431*61046927SAndroid Build Coastguard Worker } else if (interval->physreg_end > reg) {
432*61046927SAndroid Build Coastguard Worker return interval;
433*61046927SAndroid Build Coastguard Worker } else {
434*61046927SAndroid Build Coastguard Worker /* There is no interval covering reg, and ra_file_search_sloppy()
435*61046927SAndroid Build Coastguard Worker * returned the closest range to the left, so the next interval to the
436*61046927SAndroid Build Coastguard Worker * right should be the closest to the right.
437*61046927SAndroid Build Coastguard Worker */
438*61046927SAndroid Build Coastguard Worker return ra_interval_next_or_null(interval);
439*61046927SAndroid Build Coastguard Worker }
440*61046927SAndroid Build Coastguard Worker }
441*61046927SAndroid Build Coastguard Worker
442*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ra_file_search_right(struct ra_file * file,physreg_t reg)443*61046927SAndroid Build Coastguard Worker ra_file_search_right(struct ra_file *file, physreg_t reg)
444*61046927SAndroid Build Coastguard Worker {
445*61046927SAndroid Build Coastguard Worker return ra_interval_search_right(&file->physreg_intervals, reg);
446*61046927SAndroid Build Coastguard Worker }
447*61046927SAndroid Build Coastguard Worker
448*61046927SAndroid Build Coastguard Worker static int
ra_interval_insert_cmp(const struct rb_node * _a,const struct rb_node * _b)449*61046927SAndroid Build Coastguard Worker ra_interval_insert_cmp(const struct rb_node *_a, const struct rb_node *_b)
450*61046927SAndroid Build Coastguard Worker {
451*61046927SAndroid Build Coastguard Worker const struct ra_interval *a = rb_node_to_interval_const(_a);
452*61046927SAndroid Build Coastguard Worker const struct ra_interval *b = rb_node_to_interval_const(_b);
453*61046927SAndroid Build Coastguard Worker return b->physreg_start - a->physreg_start;
454*61046927SAndroid Build Coastguard Worker }
455*61046927SAndroid Build Coastguard Worker
456*61046927SAndroid Build Coastguard Worker static struct ra_interval *
ir3_reg_interval_to_ra_interval(struct ir3_reg_interval * interval)457*61046927SAndroid Build Coastguard Worker ir3_reg_interval_to_ra_interval(struct ir3_reg_interval *interval)
458*61046927SAndroid Build Coastguard Worker {
459*61046927SAndroid Build Coastguard Worker return rb_node_data(struct ra_interval, interval, interval);
460*61046927SAndroid Build Coastguard Worker }
461*61046927SAndroid Build Coastguard Worker
462*61046927SAndroid Build Coastguard Worker static struct ra_file *
ir3_reg_ctx_to_file(struct ir3_reg_ctx * ctx)463*61046927SAndroid Build Coastguard Worker ir3_reg_ctx_to_file(struct ir3_reg_ctx *ctx)
464*61046927SAndroid Build Coastguard Worker {
465*61046927SAndroid Build Coastguard Worker return rb_node_data(struct ra_file, ctx, reg_ctx);
466*61046927SAndroid Build Coastguard Worker }
467*61046927SAndroid Build Coastguard Worker
468*61046927SAndroid Build Coastguard Worker static void
interval_add(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * _interval)469*61046927SAndroid Build Coastguard Worker interval_add(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_interval)
470*61046927SAndroid Build Coastguard Worker {
471*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval);
472*61046927SAndroid Build Coastguard Worker struct ra_file *file = ir3_reg_ctx_to_file(ctx);
473*61046927SAndroid Build Coastguard Worker
474*61046927SAndroid Build Coastguard Worker /* We can assume in this case that physreg_start/physreg_end is already
475*61046927SAndroid Build Coastguard Worker * initialized.
476*61046927SAndroid Build Coastguard Worker */
477*61046927SAndroid Build Coastguard Worker for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
478*61046927SAndroid Build Coastguard Worker BITSET_CLEAR(file->available, i);
479*61046927SAndroid Build Coastguard Worker BITSET_CLEAR(file->available_to_evict, i);
480*61046927SAndroid Build Coastguard Worker }
481*61046927SAndroid Build Coastguard Worker
482*61046927SAndroid Build Coastguard Worker rb_tree_insert(&file->physreg_intervals, &interval->physreg_node,
483*61046927SAndroid Build Coastguard Worker ra_interval_insert_cmp);
484*61046927SAndroid Build Coastguard Worker }
485*61046927SAndroid Build Coastguard Worker
486*61046927SAndroid Build Coastguard Worker static void
interval_delete(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * _interval)487*61046927SAndroid Build Coastguard Worker interval_delete(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_interval)
488*61046927SAndroid Build Coastguard Worker {
489*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval);
490*61046927SAndroid Build Coastguard Worker struct ra_file *file = ir3_reg_ctx_to_file(ctx);
491*61046927SAndroid Build Coastguard Worker
492*61046927SAndroid Build Coastguard Worker for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
493*61046927SAndroid Build Coastguard Worker BITSET_SET(file->available, i);
494*61046927SAndroid Build Coastguard Worker BITSET_SET(file->available_to_evict, i);
495*61046927SAndroid Build Coastguard Worker }
496*61046927SAndroid Build Coastguard Worker
497*61046927SAndroid Build Coastguard Worker rb_tree_remove(&file->physreg_intervals, &interval->physreg_node);
498*61046927SAndroid Build Coastguard Worker }
499*61046927SAndroid Build Coastguard Worker
500*61046927SAndroid Build Coastguard Worker static void
interval_readd(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * _parent,struct ir3_reg_interval * _child)501*61046927SAndroid Build Coastguard Worker interval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_parent,
502*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *_child)
503*61046927SAndroid Build Coastguard Worker {
504*61046927SAndroid Build Coastguard Worker struct ra_interval *parent = ir3_reg_interval_to_ra_interval(_parent);
505*61046927SAndroid Build Coastguard Worker struct ra_interval *child = ir3_reg_interval_to_ra_interval(_child);
506*61046927SAndroid Build Coastguard Worker
507*61046927SAndroid Build Coastguard Worker child->physreg_start =
508*61046927SAndroid Build Coastguard Worker parent->physreg_start + (child->interval.reg->interval_start -
509*61046927SAndroid Build Coastguard Worker parent->interval.reg->interval_start);
510*61046927SAndroid Build Coastguard Worker child->physreg_end =
511*61046927SAndroid Build Coastguard Worker child->physreg_start +
512*61046927SAndroid Build Coastguard Worker (child->interval.reg->interval_end - child->interval.reg->interval_start);
513*61046927SAndroid Build Coastguard Worker
514*61046927SAndroid Build Coastguard Worker interval_add(ctx, _child);
515*61046927SAndroid Build Coastguard Worker }
516*61046927SAndroid Build Coastguard Worker
517*61046927SAndroid Build Coastguard Worker static void
ra_file_init(struct ra_file * file)518*61046927SAndroid Build Coastguard Worker ra_file_init(struct ra_file *file)
519*61046927SAndroid Build Coastguard Worker {
520*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < file->size; i++) {
521*61046927SAndroid Build Coastguard Worker BITSET_SET(file->available, i);
522*61046927SAndroid Build Coastguard Worker BITSET_SET(file->available_to_evict, i);
523*61046927SAndroid Build Coastguard Worker }
524*61046927SAndroid Build Coastguard Worker
525*61046927SAndroid Build Coastguard Worker rb_tree_init(&file->reg_ctx.intervals);
526*61046927SAndroid Build Coastguard Worker rb_tree_init(&file->physreg_intervals);
527*61046927SAndroid Build Coastguard Worker
528*61046927SAndroid Build Coastguard Worker file->reg_ctx.interval_add = interval_add;
529*61046927SAndroid Build Coastguard Worker file->reg_ctx.interval_delete = interval_delete;
530*61046927SAndroid Build Coastguard Worker file->reg_ctx.interval_readd = interval_readd;
531*61046927SAndroid Build Coastguard Worker }
532*61046927SAndroid Build Coastguard Worker
533*61046927SAndroid Build Coastguard Worker static void
ra_file_insert(struct ra_file * file,struct ra_interval * interval)534*61046927SAndroid Build Coastguard Worker ra_file_insert(struct ra_file *file, struct ra_interval *interval)
535*61046927SAndroid Build Coastguard Worker {
536*61046927SAndroid Build Coastguard Worker assert(interval->physreg_start < interval->physreg_end);
537*61046927SAndroid Build Coastguard Worker assert(interval->physreg_end <= file->size);
538*61046927SAndroid Build Coastguard Worker if (interval->interval.reg->flags & IR3_REG_HALF)
539*61046927SAndroid Build Coastguard Worker assert(interval->physreg_end <= RA_HALF_SIZE);
540*61046927SAndroid Build Coastguard Worker
541*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert(&file->reg_ctx, &interval->interval);
542*61046927SAndroid Build Coastguard Worker }
543*61046927SAndroid Build Coastguard Worker
544*61046927SAndroid Build Coastguard Worker static void
ra_file_remove(struct ra_file * file,struct ra_interval * interval)545*61046927SAndroid Build Coastguard Worker ra_file_remove(struct ra_file *file, struct ra_interval *interval)
546*61046927SAndroid Build Coastguard Worker {
547*61046927SAndroid Build Coastguard Worker ir3_reg_interval_remove(&file->reg_ctx, &interval->interval);
548*61046927SAndroid Build Coastguard Worker }
549*61046927SAndroid Build Coastguard Worker
550*61046927SAndroid Build Coastguard Worker static void
ra_file_mark_killed(struct ra_file * file,struct ra_interval * interval)551*61046927SAndroid Build Coastguard Worker ra_file_mark_killed(struct ra_file *file, struct ra_interval *interval)
552*61046927SAndroid Build Coastguard Worker {
553*61046927SAndroid Build Coastguard Worker assert(!interval->interval.parent);
554*61046927SAndroid Build Coastguard Worker
555*61046927SAndroid Build Coastguard Worker for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
556*61046927SAndroid Build Coastguard Worker BITSET_SET(file->available, i);
557*61046927SAndroid Build Coastguard Worker }
558*61046927SAndroid Build Coastguard Worker
559*61046927SAndroid Build Coastguard Worker interval->is_killed = true;
560*61046927SAndroid Build Coastguard Worker }
561*61046927SAndroid Build Coastguard Worker
562*61046927SAndroid Build Coastguard Worker static void
ra_file_unmark_killed(struct ra_file * file,struct ra_interval * interval)563*61046927SAndroid Build Coastguard Worker ra_file_unmark_killed(struct ra_file *file, struct ra_interval *interval)
564*61046927SAndroid Build Coastguard Worker {
565*61046927SAndroid Build Coastguard Worker assert(!interval->interval.parent);
566*61046927SAndroid Build Coastguard Worker
567*61046927SAndroid Build Coastguard Worker for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
568*61046927SAndroid Build Coastguard Worker BITSET_CLEAR(file->available, i);
569*61046927SAndroid Build Coastguard Worker }
570*61046927SAndroid Build Coastguard Worker
571*61046927SAndroid Build Coastguard Worker interval->is_killed = false;
572*61046927SAndroid Build Coastguard Worker }
573*61046927SAndroid Build Coastguard Worker
574*61046927SAndroid Build Coastguard Worker static physreg_t
ra_interval_get_physreg(const struct ra_interval * interval)575*61046927SAndroid Build Coastguard Worker ra_interval_get_physreg(const struct ra_interval *interval)
576*61046927SAndroid Build Coastguard Worker {
577*61046927SAndroid Build Coastguard Worker unsigned child_start = interval->interval.reg->interval_start;
578*61046927SAndroid Build Coastguard Worker
579*61046927SAndroid Build Coastguard Worker while (interval->interval.parent) {
580*61046927SAndroid Build Coastguard Worker interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
581*61046927SAndroid Build Coastguard Worker }
582*61046927SAndroid Build Coastguard Worker
583*61046927SAndroid Build Coastguard Worker return interval->physreg_start +
584*61046927SAndroid Build Coastguard Worker (child_start - interval->interval.reg->interval_start);
585*61046927SAndroid Build Coastguard Worker }
586*61046927SAndroid Build Coastguard Worker
587*61046927SAndroid Build Coastguard Worker static unsigned
ra_interval_get_num(const struct ra_interval * interval)588*61046927SAndroid Build Coastguard Worker ra_interval_get_num(const struct ra_interval *interval)
589*61046927SAndroid Build Coastguard Worker {
590*61046927SAndroid Build Coastguard Worker return ra_physreg_to_num(ra_interval_get_physreg(interval),
591*61046927SAndroid Build Coastguard Worker interval->interval.reg->flags);
592*61046927SAndroid Build Coastguard Worker }
593*61046927SAndroid Build Coastguard Worker
594*61046927SAndroid Build Coastguard Worker static void
ra_interval_init(struct ra_interval * interval,struct ir3_register * reg)595*61046927SAndroid Build Coastguard Worker ra_interval_init(struct ra_interval *interval, struct ir3_register *reg)
596*61046927SAndroid Build Coastguard Worker {
597*61046927SAndroid Build Coastguard Worker ir3_reg_interval_init(&interval->interval, reg);
598*61046927SAndroid Build Coastguard Worker interval->is_killed = false;
599*61046927SAndroid Build Coastguard Worker interval->frozen = false;
600*61046927SAndroid Build Coastguard Worker }
601*61046927SAndroid Build Coastguard Worker
602*61046927SAndroid Build Coastguard Worker static void
ra_interval_dump(struct log_stream * stream,struct ra_interval * interval)603*61046927SAndroid Build Coastguard Worker ra_interval_dump(struct log_stream *stream, struct ra_interval *interval)
604*61046927SAndroid Build Coastguard Worker {
605*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "physreg %u ", interval->physreg_start);
606*61046927SAndroid Build Coastguard Worker
607*61046927SAndroid Build Coastguard Worker ir3_reg_interval_dump(stream, &interval->interval);
608*61046927SAndroid Build Coastguard Worker }
609*61046927SAndroid Build Coastguard Worker
610*61046927SAndroid Build Coastguard Worker static void
ra_file_dump(struct log_stream * stream,struct ra_file * file)611*61046927SAndroid Build Coastguard Worker ra_file_dump(struct log_stream *stream, struct ra_file *file)
612*61046927SAndroid Build Coastguard Worker {
613*61046927SAndroid Build Coastguard Worker rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
614*61046927SAndroid Build Coastguard Worker physreg_node) {
615*61046927SAndroid Build Coastguard Worker ra_interval_dump(stream, interval);
616*61046927SAndroid Build Coastguard Worker }
617*61046927SAndroid Build Coastguard Worker
618*61046927SAndroid Build Coastguard Worker unsigned start, end;
619*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "available:\n");
620*61046927SAndroid Build Coastguard Worker BITSET_FOREACH_RANGE (start, end, file->available, file->size) {
621*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "%u-%u ", start, end);
622*61046927SAndroid Build Coastguard Worker }
623*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "\n");
624*61046927SAndroid Build Coastguard Worker
625*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "available to evict:\n");
626*61046927SAndroid Build Coastguard Worker BITSET_FOREACH_RANGE (start, end, file->available_to_evict, file->size) {
627*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "%u-%u ", start, end);
628*61046927SAndroid Build Coastguard Worker }
629*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "\n");
630*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "start: %u\n", file->start);
631*61046927SAndroid Build Coastguard Worker }
632*61046927SAndroid Build Coastguard Worker
633*61046927SAndroid Build Coastguard Worker static void
ra_ctx_dump(struct ra_ctx * ctx)634*61046927SAndroid Build Coastguard Worker ra_ctx_dump(struct ra_ctx *ctx)
635*61046927SAndroid Build Coastguard Worker {
636*61046927SAndroid Build Coastguard Worker struct log_stream *stream = mesa_log_streami();
637*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "full:\n");
638*61046927SAndroid Build Coastguard Worker ra_file_dump(stream, &ctx->full);
639*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "half:\n");
640*61046927SAndroid Build Coastguard Worker ra_file_dump(stream, &ctx->half);
641*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "shared:\n");
642*61046927SAndroid Build Coastguard Worker ra_file_dump(stream, &ctx->shared);
643*61046927SAndroid Build Coastguard Worker mesa_log_stream_destroy(stream);
644*61046927SAndroid Build Coastguard Worker }
645*61046927SAndroid Build Coastguard Worker
646*61046927SAndroid Build Coastguard Worker static unsigned
reg_file_size(struct ra_file * file,struct ir3_register * reg)647*61046927SAndroid Build Coastguard Worker reg_file_size(struct ra_file *file, struct ir3_register *reg)
648*61046927SAndroid Build Coastguard Worker {
649*61046927SAndroid Build Coastguard Worker /* Half-regs can only take up the first half of the combined regfile */
650*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_HALF) {
651*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_SHARED)
652*61046927SAndroid Build Coastguard Worker return RA_SHARED_HALF_SIZE;
653*61046927SAndroid Build Coastguard Worker else
654*61046927SAndroid Build Coastguard Worker return MIN2(file->size, RA_HALF_SIZE);
655*61046927SAndroid Build Coastguard Worker } else {
656*61046927SAndroid Build Coastguard Worker return file->size;
657*61046927SAndroid Build Coastguard Worker }
658*61046927SAndroid Build Coastguard Worker }
659*61046927SAndroid Build Coastguard Worker
660*61046927SAndroid Build Coastguard Worker /* ra_pop_interval/ra_push_interval provide an API to shuffle around multiple
661*61046927SAndroid Build Coastguard Worker * top-level intervals at once. Pop multiple intervals, then push them back in
662*61046927SAndroid Build Coastguard Worker * any order.
663*61046927SAndroid Build Coastguard Worker */
664*61046927SAndroid Build Coastguard Worker
665*61046927SAndroid Build Coastguard Worker struct ra_removed_interval {
666*61046927SAndroid Build Coastguard Worker struct ra_interval *interval;
667*61046927SAndroid Build Coastguard Worker unsigned size;
668*61046927SAndroid Build Coastguard Worker };
669*61046927SAndroid Build Coastguard Worker
670*61046927SAndroid Build Coastguard Worker static struct ra_removed_interval
ra_pop_interval(struct ra_ctx * ctx,struct ra_file * file,struct ra_interval * interval)671*61046927SAndroid Build Coastguard Worker ra_pop_interval(struct ra_ctx *ctx, struct ra_file *file,
672*61046927SAndroid Build Coastguard Worker struct ra_interval *interval)
673*61046927SAndroid Build Coastguard Worker {
674*61046927SAndroid Build Coastguard Worker assert(!interval->interval.parent);
675*61046927SAndroid Build Coastguard Worker /* shared live splitting is not allowed! */
676*61046927SAndroid Build Coastguard Worker assert(!(interval->interval.reg->flags & IR3_REG_SHARED));
677*61046927SAndroid Build Coastguard Worker
678*61046927SAndroid Build Coastguard Worker /* Check if we've already moved this reg before */
679*61046927SAndroid Build Coastguard Worker unsigned pcopy_index;
680*61046927SAndroid Build Coastguard Worker for (pcopy_index = 0; pcopy_index < ctx->parallel_copies_count;
681*61046927SAndroid Build Coastguard Worker pcopy_index++) {
682*61046927SAndroid Build Coastguard Worker if (ctx->parallel_copies[pcopy_index].interval == interval)
683*61046927SAndroid Build Coastguard Worker break;
684*61046927SAndroid Build Coastguard Worker }
685*61046927SAndroid Build Coastguard Worker
686*61046927SAndroid Build Coastguard Worker if (pcopy_index == ctx->parallel_copies_count) {
687*61046927SAndroid Build Coastguard Worker array_insert(ctx, ctx->parallel_copies,
688*61046927SAndroid Build Coastguard Worker (struct ra_parallel_copy){
689*61046927SAndroid Build Coastguard Worker .interval = interval,
690*61046927SAndroid Build Coastguard Worker .src = interval->physreg_start,
691*61046927SAndroid Build Coastguard Worker });
692*61046927SAndroid Build Coastguard Worker }
693*61046927SAndroid Build Coastguard Worker
694*61046927SAndroid Build Coastguard Worker ir3_reg_interval_remove_temp(&file->reg_ctx, &interval->interval);
695*61046927SAndroid Build Coastguard Worker
696*61046927SAndroid Build Coastguard Worker return (struct ra_removed_interval){
697*61046927SAndroid Build Coastguard Worker .interval = interval,
698*61046927SAndroid Build Coastguard Worker .size = interval->physreg_end - interval->physreg_start,
699*61046927SAndroid Build Coastguard Worker };
700*61046927SAndroid Build Coastguard Worker }
701*61046927SAndroid Build Coastguard Worker
702*61046927SAndroid Build Coastguard Worker static void
ra_push_interval(struct ra_ctx * ctx,struct ra_file * file,const struct ra_removed_interval * removed,physreg_t dst)703*61046927SAndroid Build Coastguard Worker ra_push_interval(struct ra_ctx *ctx, struct ra_file *file,
704*61046927SAndroid Build Coastguard Worker const struct ra_removed_interval *removed, physreg_t dst)
705*61046927SAndroid Build Coastguard Worker {
706*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = removed->interval;
707*61046927SAndroid Build Coastguard Worker
708*61046927SAndroid Build Coastguard Worker interval->physreg_start = dst;
709*61046927SAndroid Build Coastguard Worker interval->physreg_end = dst + removed->size;
710*61046927SAndroid Build Coastguard Worker
711*61046927SAndroid Build Coastguard Worker assert(interval->physreg_end <= file->size);
712*61046927SAndroid Build Coastguard Worker if (interval->interval.reg->flags & IR3_REG_HALF)
713*61046927SAndroid Build Coastguard Worker assert(interval->physreg_end <= RA_HALF_SIZE);
714*61046927SAndroid Build Coastguard Worker
715*61046927SAndroid Build Coastguard Worker ir3_reg_interval_reinsert(&file->reg_ctx, &interval->interval);
716*61046927SAndroid Build Coastguard Worker }
717*61046927SAndroid Build Coastguard Worker
718*61046927SAndroid Build Coastguard Worker /* Pick up the interval and place it at "dst". */
719*61046927SAndroid Build Coastguard Worker static void
ra_move_interval(struct ra_ctx * ctx,struct ra_file * file,struct ra_interval * interval,physreg_t dst)720*61046927SAndroid Build Coastguard Worker ra_move_interval(struct ra_ctx *ctx, struct ra_file *file,
721*61046927SAndroid Build Coastguard Worker struct ra_interval *interval, physreg_t dst)
722*61046927SAndroid Build Coastguard Worker {
723*61046927SAndroid Build Coastguard Worker struct ra_removed_interval temp = ra_pop_interval(ctx, file, interval);
724*61046927SAndroid Build Coastguard Worker ra_push_interval(ctx, file, &temp, dst);
725*61046927SAndroid Build Coastguard Worker }
726*61046927SAndroid Build Coastguard Worker
727*61046927SAndroid Build Coastguard Worker static struct ra_file *
ra_get_file(struct ra_ctx * ctx,struct ir3_register * reg)728*61046927SAndroid Build Coastguard Worker ra_get_file(struct ra_ctx *ctx, struct ir3_register *reg)
729*61046927SAndroid Build Coastguard Worker {
730*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_SHARED)
731*61046927SAndroid Build Coastguard Worker return &ctx->shared;
732*61046927SAndroid Build Coastguard Worker else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
733*61046927SAndroid Build Coastguard Worker return &ctx->full;
734*61046927SAndroid Build Coastguard Worker else
735*61046927SAndroid Build Coastguard Worker return &ctx->half;
736*61046927SAndroid Build Coastguard Worker }
737*61046927SAndroid Build Coastguard Worker
738*61046927SAndroid Build Coastguard Worker
739*61046927SAndroid Build Coastguard Worker /* Returns true if the proposed spot for "dst" or a killed source overlaps a
740*61046927SAndroid Build Coastguard Worker * destination that's been allocated.
741*61046927SAndroid Build Coastguard Worker */
742*61046927SAndroid Build Coastguard Worker static bool
check_dst_overlap(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * dst,physreg_t start,physreg_t end)743*61046927SAndroid Build Coastguard Worker check_dst_overlap(struct ra_ctx *ctx, struct ra_file *file,
744*61046927SAndroid Build Coastguard Worker struct ir3_register *dst, physreg_t start,
745*61046927SAndroid Build Coastguard Worker physreg_t end)
746*61046927SAndroid Build Coastguard Worker {
747*61046927SAndroid Build Coastguard Worker struct ir3_instruction *instr = dst->instr;
748*61046927SAndroid Build Coastguard Worker
749*61046927SAndroid Build Coastguard Worker ra_foreach_dst (other_dst, instr) {
750*61046927SAndroid Build Coastguard Worker /* We assume only destinations before the current one have been allocated.
751*61046927SAndroid Build Coastguard Worker */
752*61046927SAndroid Build Coastguard Worker if (other_dst == dst)
753*61046927SAndroid Build Coastguard Worker break;
754*61046927SAndroid Build Coastguard Worker
755*61046927SAndroid Build Coastguard Worker if (ra_get_file(ctx, other_dst) != file)
756*61046927SAndroid Build Coastguard Worker continue;
757*61046927SAndroid Build Coastguard Worker
758*61046927SAndroid Build Coastguard Worker struct ra_interval *other_interval = &ctx->intervals[other_dst->name];
759*61046927SAndroid Build Coastguard Worker assert(!other_interval->interval.parent);
760*61046927SAndroid Build Coastguard Worker physreg_t other_start = other_interval->physreg_start;
761*61046927SAndroid Build Coastguard Worker physreg_t other_end = other_interval->physreg_end;
762*61046927SAndroid Build Coastguard Worker
763*61046927SAndroid Build Coastguard Worker if (other_end > start && end > other_start)
764*61046927SAndroid Build Coastguard Worker return true;
765*61046927SAndroid Build Coastguard Worker }
766*61046927SAndroid Build Coastguard Worker
767*61046927SAndroid Build Coastguard Worker return false;
768*61046927SAndroid Build Coastguard Worker }
769*61046927SAndroid Build Coastguard Worker
770*61046927SAndroid Build Coastguard Worker /* True if the destination is "early-clobber," meaning that it cannot be
771*61046927SAndroid Build Coastguard Worker * allocated over killed sources. Some destinations always require it, but it
772*61046927SAndroid Build Coastguard Worker * also is implicitly true for tied destinations whose source is live-through.
773*61046927SAndroid Build Coastguard Worker * If the source is killed, then we skip allocating a register for the
774*61046927SAndroid Build Coastguard Worker * destination altogether so we don't need to worry about that case here.
775*61046927SAndroid Build Coastguard Worker */
776*61046927SAndroid Build Coastguard Worker static bool
is_early_clobber(struct ir3_register * reg)777*61046927SAndroid Build Coastguard Worker is_early_clobber(struct ir3_register *reg)
778*61046927SAndroid Build Coastguard Worker {
779*61046927SAndroid Build Coastguard Worker return (reg->flags & IR3_REG_EARLY_CLOBBER) || reg->tied;
780*61046927SAndroid Build Coastguard Worker }
781*61046927SAndroid Build Coastguard Worker
782*61046927SAndroid Build Coastguard Worker static bool
get_reg_specified(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg,physreg_t physreg,bool is_source)783*61046927SAndroid Build Coastguard Worker get_reg_specified(struct ra_ctx *ctx, struct ra_file *file,
784*61046927SAndroid Build Coastguard Worker struct ir3_register *reg, physreg_t physreg, bool is_source)
785*61046927SAndroid Build Coastguard Worker {
786*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < reg_size(reg); i++) {
787*61046927SAndroid Build Coastguard Worker if (!BITSET_TEST(is_early_clobber(reg) || is_source ?
788*61046927SAndroid Build Coastguard Worker file->available_to_evict : file->available,
789*61046927SAndroid Build Coastguard Worker physreg + i))
790*61046927SAndroid Build Coastguard Worker return false;
791*61046927SAndroid Build Coastguard Worker }
792*61046927SAndroid Build Coastguard Worker
793*61046927SAndroid Build Coastguard Worker if (!is_source &&
794*61046927SAndroid Build Coastguard Worker check_dst_overlap(ctx, file, reg, physreg, physreg + reg_size(reg)))
795*61046927SAndroid Build Coastguard Worker return false;
796*61046927SAndroid Build Coastguard Worker
797*61046927SAndroid Build Coastguard Worker return true;
798*61046927SAndroid Build Coastguard Worker }
799*61046927SAndroid Build Coastguard Worker
800*61046927SAndroid Build Coastguard Worker /* Try to evict any registers conflicting with the proposed spot "physreg" for
801*61046927SAndroid Build Coastguard Worker * "reg". That is, move them to other places so that we can allocate "physreg"
802*61046927SAndroid Build Coastguard Worker * here.
803*61046927SAndroid Build Coastguard Worker */
804*61046927SAndroid Build Coastguard Worker
805*61046927SAndroid Build Coastguard Worker static bool
try_evict_regs(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg,physreg_t physreg,unsigned * _eviction_count,bool is_source,bool speculative)806*61046927SAndroid Build Coastguard Worker try_evict_regs(struct ra_ctx *ctx, struct ra_file *file,
807*61046927SAndroid Build Coastguard Worker struct ir3_register *reg, physreg_t physreg,
808*61046927SAndroid Build Coastguard Worker unsigned *_eviction_count, bool is_source, bool speculative)
809*61046927SAndroid Build Coastguard Worker {
810*61046927SAndroid Build Coastguard Worker BITSET_DECLARE(available_to_evict, RA_MAX_FILE_SIZE);
811*61046927SAndroid Build Coastguard Worker memcpy(available_to_evict, file->available_to_evict,
812*61046927SAndroid Build Coastguard Worker sizeof(available_to_evict));
813*61046927SAndroid Build Coastguard Worker
814*61046927SAndroid Build Coastguard Worker BITSET_DECLARE(available, RA_MAX_FILE_SIZE);
815*61046927SAndroid Build Coastguard Worker memcpy(available, file->available, sizeof(available));
816*61046927SAndroid Build Coastguard Worker
817*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < reg_size(reg); i++) {
818*61046927SAndroid Build Coastguard Worker BITSET_CLEAR(available_to_evict, physreg + i);
819*61046927SAndroid Build Coastguard Worker BITSET_CLEAR(available, physreg + i);
820*61046927SAndroid Build Coastguard Worker }
821*61046927SAndroid Build Coastguard Worker
822*61046927SAndroid Build Coastguard Worker unsigned eviction_count = 0;
823*61046927SAndroid Build Coastguard Worker /* Iterate over each range conflicting with physreg */
824*61046927SAndroid Build Coastguard Worker for (struct ra_interval *conflicting = ra_file_search_right(file, physreg),
825*61046927SAndroid Build Coastguard Worker *next = ra_interval_next_or_null(conflicting);
826*61046927SAndroid Build Coastguard Worker conflicting != NULL &&
827*61046927SAndroid Build Coastguard Worker conflicting->physreg_start < physreg + reg_size(reg);
828*61046927SAndroid Build Coastguard Worker conflicting = next, next = ra_interval_next_or_null(next)) {
829*61046927SAndroid Build Coastguard Worker if (!is_early_clobber(reg) && !is_source && conflicting->is_killed)
830*61046927SAndroid Build Coastguard Worker continue;
831*61046927SAndroid Build Coastguard Worker
832*61046927SAndroid Build Coastguard Worker if (conflicting->frozen) {
833*61046927SAndroid Build Coastguard Worker assert(speculative);
834*61046927SAndroid Build Coastguard Worker return false;
835*61046927SAndroid Build Coastguard Worker }
836*61046927SAndroid Build Coastguard Worker
837*61046927SAndroid Build Coastguard Worker unsigned conflicting_file_size =
838*61046927SAndroid Build Coastguard Worker reg_file_size(file, conflicting->interval.reg);
839*61046927SAndroid Build Coastguard Worker unsigned avail_start, avail_end;
840*61046927SAndroid Build Coastguard Worker bool evicted = false;
841*61046927SAndroid Build Coastguard Worker BITSET_FOREACH_RANGE (avail_start, avail_end, available_to_evict,
842*61046927SAndroid Build Coastguard Worker conflicting_file_size) {
843*61046927SAndroid Build Coastguard Worker unsigned size = avail_end - avail_start;
844*61046927SAndroid Build Coastguard Worker
845*61046927SAndroid Build Coastguard Worker /* non-half registers must be aligned */
846*61046927SAndroid Build Coastguard Worker if (!(conflicting->interval.reg->flags & IR3_REG_HALF) &&
847*61046927SAndroid Build Coastguard Worker avail_start % 2 == 1) {
848*61046927SAndroid Build Coastguard Worker avail_start++;
849*61046927SAndroid Build Coastguard Worker size--;
850*61046927SAndroid Build Coastguard Worker }
851*61046927SAndroid Build Coastguard Worker
852*61046927SAndroid Build Coastguard Worker unsigned conflicting_size =
853*61046927SAndroid Build Coastguard Worker conflicting->physreg_end - conflicting->physreg_start;
854*61046927SAndroid Build Coastguard Worker if (size >= conflicting_size &&
855*61046927SAndroid Build Coastguard Worker (is_source ||
856*61046927SAndroid Build Coastguard Worker !check_dst_overlap(ctx, file, reg, avail_start,
857*61046927SAndroid Build Coastguard Worker avail_start + conflicting_size))) {
858*61046927SAndroid Build Coastguard Worker for (unsigned i = 0;
859*61046927SAndroid Build Coastguard Worker i < conflicting->physreg_end - conflicting->physreg_start; i++)
860*61046927SAndroid Build Coastguard Worker BITSET_CLEAR(available_to_evict, avail_start + i);
861*61046927SAndroid Build Coastguard Worker eviction_count +=
862*61046927SAndroid Build Coastguard Worker conflicting->physreg_end - conflicting->physreg_start;
863*61046927SAndroid Build Coastguard Worker if (!speculative)
864*61046927SAndroid Build Coastguard Worker ra_move_interval(ctx, file, conflicting, avail_start);
865*61046927SAndroid Build Coastguard Worker evicted = true;
866*61046927SAndroid Build Coastguard Worker break;
867*61046927SAndroid Build Coastguard Worker }
868*61046927SAndroid Build Coastguard Worker }
869*61046927SAndroid Build Coastguard Worker
870*61046927SAndroid Build Coastguard Worker if (evicted)
871*61046927SAndroid Build Coastguard Worker continue;
872*61046927SAndroid Build Coastguard Worker
873*61046927SAndroid Build Coastguard Worker /* If we couldn't evict this range, but the register we're allocating is
874*61046927SAndroid Build Coastguard Worker * allowed to overlap with a killed range, then we may be able to swap it
875*61046927SAndroid Build Coastguard Worker * with a killed range to acheive the same effect.
876*61046927SAndroid Build Coastguard Worker */
877*61046927SAndroid Build Coastguard Worker if (is_early_clobber(reg) || is_source)
878*61046927SAndroid Build Coastguard Worker return false;
879*61046927SAndroid Build Coastguard Worker
880*61046927SAndroid Build Coastguard Worker foreach_interval (killed, file) {
881*61046927SAndroid Build Coastguard Worker if (!killed->is_killed)
882*61046927SAndroid Build Coastguard Worker continue;
883*61046927SAndroid Build Coastguard Worker
884*61046927SAndroid Build Coastguard Worker if (killed->physreg_end - killed->physreg_start !=
885*61046927SAndroid Build Coastguard Worker conflicting->physreg_end - conflicting->physreg_start)
886*61046927SAndroid Build Coastguard Worker continue;
887*61046927SAndroid Build Coastguard Worker
888*61046927SAndroid Build Coastguard Worker if (killed->physreg_end > conflicting_file_size ||
889*61046927SAndroid Build Coastguard Worker conflicting->physreg_end > reg_file_size(file, killed->interval.reg))
890*61046927SAndroid Build Coastguard Worker continue;
891*61046927SAndroid Build Coastguard Worker
892*61046927SAndroid Build Coastguard Worker /* We can't swap the killed range if it partially/fully overlaps the
893*61046927SAndroid Build Coastguard Worker * space we're trying to allocate or (in speculative mode) if it's
894*61046927SAndroid Build Coastguard Worker * already been swapped and will overlap when we actually evict.
895*61046927SAndroid Build Coastguard Worker */
896*61046927SAndroid Build Coastguard Worker bool killed_available = true;
897*61046927SAndroid Build Coastguard Worker for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) {
898*61046927SAndroid Build Coastguard Worker if (!BITSET_TEST(available, i)) {
899*61046927SAndroid Build Coastguard Worker killed_available = false;
900*61046927SAndroid Build Coastguard Worker break;
901*61046927SAndroid Build Coastguard Worker }
902*61046927SAndroid Build Coastguard Worker }
903*61046927SAndroid Build Coastguard Worker
904*61046927SAndroid Build Coastguard Worker if (!killed_available)
905*61046927SAndroid Build Coastguard Worker continue;
906*61046927SAndroid Build Coastguard Worker
907*61046927SAndroid Build Coastguard Worker if (check_dst_overlap(ctx, file, reg, killed->physreg_start,
908*61046927SAndroid Build Coastguard Worker killed->physreg_end))
909*61046927SAndroid Build Coastguard Worker continue;
910*61046927SAndroid Build Coastguard Worker
911*61046927SAndroid Build Coastguard Worker /* Check for alignment if one is a full reg */
912*61046927SAndroid Build Coastguard Worker if ((!(killed->interval.reg->flags & IR3_REG_HALF) ||
913*61046927SAndroid Build Coastguard Worker !(conflicting->interval.reg->flags & IR3_REG_HALF)) &&
914*61046927SAndroid Build Coastguard Worker (killed->physreg_start % 2 != 0 ||
915*61046927SAndroid Build Coastguard Worker conflicting->physreg_start % 2 != 0))
916*61046927SAndroid Build Coastguard Worker continue;
917*61046927SAndroid Build Coastguard Worker
918*61046927SAndroid Build Coastguard Worker for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) {
919*61046927SAndroid Build Coastguard Worker BITSET_CLEAR(available, i);
920*61046927SAndroid Build Coastguard Worker }
921*61046927SAndroid Build Coastguard Worker /* Because this will generate swaps instead of moves, multiply the
922*61046927SAndroid Build Coastguard Worker * cost by 2.
923*61046927SAndroid Build Coastguard Worker */
924*61046927SAndroid Build Coastguard Worker eviction_count += (killed->physreg_end - killed->physreg_start) * 2;
925*61046927SAndroid Build Coastguard Worker if (!speculative) {
926*61046927SAndroid Build Coastguard Worker physreg_t killed_start = killed->physreg_start,
927*61046927SAndroid Build Coastguard Worker conflicting_start = conflicting->physreg_start;
928*61046927SAndroid Build Coastguard Worker struct ra_removed_interval killed_removed =
929*61046927SAndroid Build Coastguard Worker ra_pop_interval(ctx, file, killed);
930*61046927SAndroid Build Coastguard Worker struct ra_removed_interval conflicting_removed =
931*61046927SAndroid Build Coastguard Worker ra_pop_interval(ctx, file, conflicting);
932*61046927SAndroid Build Coastguard Worker ra_push_interval(ctx, file, &killed_removed, conflicting_start);
933*61046927SAndroid Build Coastguard Worker ra_push_interval(ctx, file, &conflicting_removed, killed_start);
934*61046927SAndroid Build Coastguard Worker }
935*61046927SAndroid Build Coastguard Worker
936*61046927SAndroid Build Coastguard Worker evicted = true;
937*61046927SAndroid Build Coastguard Worker break;
938*61046927SAndroid Build Coastguard Worker }
939*61046927SAndroid Build Coastguard Worker
940*61046927SAndroid Build Coastguard Worker if (!evicted)
941*61046927SAndroid Build Coastguard Worker return false;
942*61046927SAndroid Build Coastguard Worker }
943*61046927SAndroid Build Coastguard Worker
944*61046927SAndroid Build Coastguard Worker *_eviction_count = eviction_count;
945*61046927SAndroid Build Coastguard Worker return true;
946*61046927SAndroid Build Coastguard Worker }
947*61046927SAndroid Build Coastguard Worker
948*61046927SAndroid Build Coastguard Worker static int
removed_interval_cmp(const void * _i1,const void * _i2)949*61046927SAndroid Build Coastguard Worker removed_interval_cmp(const void *_i1, const void *_i2)
950*61046927SAndroid Build Coastguard Worker {
951*61046927SAndroid Build Coastguard Worker const struct ra_removed_interval *i1 = _i1;
952*61046927SAndroid Build Coastguard Worker const struct ra_removed_interval *i2 = _i2;
953*61046927SAndroid Build Coastguard Worker
954*61046927SAndroid Build Coastguard Worker /* We sort the registers as follows:
955*61046927SAndroid Build Coastguard Worker *
956*61046927SAndroid Build Coastguard Worker * |------------------------------------------------------------------------------------------|
957*61046927SAndroid Build Coastguard Worker * | | | | | | |
958*61046927SAndroid Build Coastguard Worker * | Half | Half early-clobber | Half | Full | Full early-clobber | Full |
959*61046927SAndroid Build Coastguard Worker * | live-through | destination | killed | killed | destination | live-through |
960*61046927SAndroid Build Coastguard Worker * | | | | | | |
961*61046927SAndroid Build Coastguard Worker * |------------------------------------------------------------------------------------------|
962*61046927SAndroid Build Coastguard Worker * | |
963*61046927SAndroid Build Coastguard Worker * | Destination |
964*61046927SAndroid Build Coastguard Worker * | |
965*61046927SAndroid Build Coastguard Worker * |-----------------|
966*61046927SAndroid Build Coastguard Worker *
967*61046927SAndroid Build Coastguard Worker * Half-registers have to be first so that they stay in the low half of
968*61046927SAndroid Build Coastguard Worker * the register file. Then half and full killed must stay together so that
969*61046927SAndroid Build Coastguard Worker * there's a contiguous range where we can put the register. With this
970*61046927SAndroid Build Coastguard Worker * structure we should be able to accomodate any collection of intervals
971*61046927SAndroid Build Coastguard Worker * such that the total number of half components is within the half limit
972*61046927SAndroid Build Coastguard Worker * and the combined components are within the full limit.
973*61046927SAndroid Build Coastguard Worker */
974*61046927SAndroid Build Coastguard Worker
975*61046927SAndroid Build Coastguard Worker unsigned i1_align = reg_elem_size(i1->interval->interval.reg);
976*61046927SAndroid Build Coastguard Worker unsigned i2_align = reg_elem_size(i2->interval->interval.reg);
977*61046927SAndroid Build Coastguard Worker if (i1_align > i2_align)
978*61046927SAndroid Build Coastguard Worker return 1;
979*61046927SAndroid Build Coastguard Worker if (i1_align < i2_align)
980*61046927SAndroid Build Coastguard Worker return -1;
981*61046927SAndroid Build Coastguard Worker
982*61046927SAndroid Build Coastguard Worker if (i1_align == 1) {
983*61046927SAndroid Build Coastguard Worker if (i2->interval->is_killed)
984*61046927SAndroid Build Coastguard Worker return -1;
985*61046927SAndroid Build Coastguard Worker if (i1->interval->is_killed)
986*61046927SAndroid Build Coastguard Worker return 1;
987*61046927SAndroid Build Coastguard Worker } else {
988*61046927SAndroid Build Coastguard Worker if (i2->interval->is_killed)
989*61046927SAndroid Build Coastguard Worker return 1;
990*61046927SAndroid Build Coastguard Worker if (i1->interval->is_killed)
991*61046927SAndroid Build Coastguard Worker return -1;
992*61046927SAndroid Build Coastguard Worker }
993*61046927SAndroid Build Coastguard Worker
994*61046927SAndroid Build Coastguard Worker return 0;
995*61046927SAndroid Build Coastguard Worker }
996*61046927SAndroid Build Coastguard Worker
997*61046927SAndroid Build Coastguard Worker static int
dsts_cmp(const void * _i1,const void * _i2)998*61046927SAndroid Build Coastguard Worker dsts_cmp(const void *_i1, const void *_i2)
999*61046927SAndroid Build Coastguard Worker {
1000*61046927SAndroid Build Coastguard Worker struct ir3_register *i1 = *(struct ir3_register *const *) _i1;
1001*61046927SAndroid Build Coastguard Worker struct ir3_register *i2 = *(struct ir3_register *const *) _i2;
1002*61046927SAndroid Build Coastguard Worker
1003*61046927SAndroid Build Coastguard Worker /* Treat tied destinations as-if they are live-through sources, and normal
1004*61046927SAndroid Build Coastguard Worker * destinations as killed sources.
1005*61046927SAndroid Build Coastguard Worker */
1006*61046927SAndroid Build Coastguard Worker unsigned i1_align = reg_elem_size(i1);
1007*61046927SAndroid Build Coastguard Worker unsigned i2_align = reg_elem_size(i2);
1008*61046927SAndroid Build Coastguard Worker if (i1_align > i2_align)
1009*61046927SAndroid Build Coastguard Worker return 1;
1010*61046927SAndroid Build Coastguard Worker if (i1_align < i2_align)
1011*61046927SAndroid Build Coastguard Worker return -1;
1012*61046927SAndroid Build Coastguard Worker
1013*61046927SAndroid Build Coastguard Worker if (i1_align == 1) {
1014*61046927SAndroid Build Coastguard Worker if (!is_early_clobber(i2))
1015*61046927SAndroid Build Coastguard Worker return -1;
1016*61046927SAndroid Build Coastguard Worker if (!is_early_clobber(i1))
1017*61046927SAndroid Build Coastguard Worker return 1;
1018*61046927SAndroid Build Coastguard Worker } else {
1019*61046927SAndroid Build Coastguard Worker if (!is_early_clobber(i2))
1020*61046927SAndroid Build Coastguard Worker return 1;
1021*61046927SAndroid Build Coastguard Worker if (!is_early_clobber(i1))
1022*61046927SAndroid Build Coastguard Worker return -1;
1023*61046927SAndroid Build Coastguard Worker }
1024*61046927SAndroid Build Coastguard Worker
1025*61046927SAndroid Build Coastguard Worker return 0;
1026*61046927SAndroid Build Coastguard Worker }
1027*61046927SAndroid Build Coastguard Worker
1028*61046927SAndroid Build Coastguard Worker /* "Compress" all the live intervals so that there is enough space for the
1029*61046927SAndroid Build Coastguard Worker * destination register. As there can be gaps when a more-aligned interval
1030*61046927SAndroid Build Coastguard Worker * follows a less-aligned interval, this also sorts them to remove such
1031*61046927SAndroid Build Coastguard Worker * "padding", which may be required when space is very tight. This isn't
1032*61046927SAndroid Build Coastguard Worker * amazing, but should be used only as a last resort in case the register file
1033*61046927SAndroid Build Coastguard Worker * is almost full and badly fragmented.
1034*61046927SAndroid Build Coastguard Worker *
1035*61046927SAndroid Build Coastguard Worker * Return the physreg to use.
1036*61046927SAndroid Build Coastguard Worker */
1037*61046927SAndroid Build Coastguard Worker static physreg_t
compress_regs_left(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg)1038*61046927SAndroid Build Coastguard Worker compress_regs_left(struct ra_ctx *ctx, struct ra_file *file,
1039*61046927SAndroid Build Coastguard Worker struct ir3_register *reg)
1040*61046927SAndroid Build Coastguard Worker {
1041*61046927SAndroid Build Coastguard Worker unsigned reg_align = reg_elem_size(reg);
1042*61046927SAndroid Build Coastguard Worker DECLARE_ARRAY(struct ra_removed_interval, intervals);
1043*61046927SAndroid Build Coastguard Worker intervals_count = intervals_sz = 0;
1044*61046927SAndroid Build Coastguard Worker intervals = NULL;
1045*61046927SAndroid Build Coastguard Worker
1046*61046927SAndroid Build Coastguard Worker DECLARE_ARRAY(struct ir3_register *, dsts);
1047*61046927SAndroid Build Coastguard Worker dsts_count = dsts_sz = 0;
1048*61046927SAndroid Build Coastguard Worker dsts = NULL;
1049*61046927SAndroid Build Coastguard Worker array_insert(ctx, dsts, reg);
1050*61046927SAndroid Build Coastguard Worker bool dst_inserted[reg->instr->dsts_count];
1051*61046927SAndroid Build Coastguard Worker
1052*61046927SAndroid Build Coastguard Worker unsigned dst_size = reg->tied ? 0 : reg_size(reg);
1053*61046927SAndroid Build Coastguard Worker unsigned ec_dst_size = is_early_clobber(reg) ? reg_size(reg) : 0;
1054*61046927SAndroid Build Coastguard Worker unsigned half_dst_size = 0, ec_half_dst_size = 0;
1055*61046927SAndroid Build Coastguard Worker if (reg_align == 1) {
1056*61046927SAndroid Build Coastguard Worker half_dst_size = dst_size;
1057*61046927SAndroid Build Coastguard Worker ec_half_dst_size = ec_dst_size;
1058*61046927SAndroid Build Coastguard Worker }
1059*61046927SAndroid Build Coastguard Worker
1060*61046927SAndroid Build Coastguard Worker unsigned removed_size = 0, removed_half_size = 0;
1061*61046927SAndroid Build Coastguard Worker unsigned removed_killed_size = 0, removed_killed_half_size = 0;
1062*61046927SAndroid Build Coastguard Worker unsigned file_size = reg_file_size(file, reg);
1063*61046927SAndroid Build Coastguard Worker physreg_t start_reg = 0;
1064*61046927SAndroid Build Coastguard Worker
1065*61046927SAndroid Build Coastguard Worker foreach_interval_rev_safe (interval, file) {
1066*61046927SAndroid Build Coastguard Worker /* We'll check if we can compact the intervals starting here. */
1067*61046927SAndroid Build Coastguard Worker physreg_t candidate_start = interval->physreg_end;
1068*61046927SAndroid Build Coastguard Worker
1069*61046927SAndroid Build Coastguard Worker /* Check if there are any other destinations we need to compact. */
1070*61046927SAndroid Build Coastguard Worker ra_foreach_dst_n (other_dst, n, reg->instr) {
1071*61046927SAndroid Build Coastguard Worker if (other_dst == reg)
1072*61046927SAndroid Build Coastguard Worker break;
1073*61046927SAndroid Build Coastguard Worker if (ra_get_file(ctx, other_dst) != file)
1074*61046927SAndroid Build Coastguard Worker continue;
1075*61046927SAndroid Build Coastguard Worker if (dst_inserted[n])
1076*61046927SAndroid Build Coastguard Worker continue;
1077*61046927SAndroid Build Coastguard Worker
1078*61046927SAndroid Build Coastguard Worker struct ra_interval *other_interval = &ctx->intervals[other_dst->name];
1079*61046927SAndroid Build Coastguard Worker /* if the destination partially overlaps this interval, we need to
1080*61046927SAndroid Build Coastguard Worker * extend candidate_start to the end.
1081*61046927SAndroid Build Coastguard Worker */
1082*61046927SAndroid Build Coastguard Worker if (other_interval->physreg_start < candidate_start) {
1083*61046927SAndroid Build Coastguard Worker candidate_start = MAX2(candidate_start,
1084*61046927SAndroid Build Coastguard Worker other_interval->physreg_end);
1085*61046927SAndroid Build Coastguard Worker continue;
1086*61046927SAndroid Build Coastguard Worker }
1087*61046927SAndroid Build Coastguard Worker
1088*61046927SAndroid Build Coastguard Worker dst_inserted[n] = true;
1089*61046927SAndroid Build Coastguard Worker
1090*61046927SAndroid Build Coastguard Worker /* dst intervals with a tied killed source are considered attached to
1091*61046927SAndroid Build Coastguard Worker * that source. Don't actually insert them. This means we have to
1092*61046927SAndroid Build Coastguard Worker * update them below if their tied source moves.
1093*61046927SAndroid Build Coastguard Worker */
1094*61046927SAndroid Build Coastguard Worker if (other_dst->tied) {
1095*61046927SAndroid Build Coastguard Worker struct ra_interval *tied_interval =
1096*61046927SAndroid Build Coastguard Worker &ctx->intervals[other_dst->tied->def->name];
1097*61046927SAndroid Build Coastguard Worker if (tied_interval->is_killed)
1098*61046927SAndroid Build Coastguard Worker continue;
1099*61046927SAndroid Build Coastguard Worker }
1100*61046927SAndroid Build Coastguard Worker
1101*61046927SAndroid Build Coastguard Worker d("popping destination %u physreg %u\n",
1102*61046927SAndroid Build Coastguard Worker other_interval->interval.reg->name,
1103*61046927SAndroid Build Coastguard Worker other_interval->physreg_start);
1104*61046927SAndroid Build Coastguard Worker
1105*61046927SAndroid Build Coastguard Worker array_insert(ctx, dsts, other_dst);
1106*61046927SAndroid Build Coastguard Worker unsigned interval_size = reg_size(other_dst);
1107*61046927SAndroid Build Coastguard Worker if (is_early_clobber(other_dst)) {
1108*61046927SAndroid Build Coastguard Worker ec_dst_size += interval_size;
1109*61046927SAndroid Build Coastguard Worker if (other_interval->interval.reg->flags & IR3_REG_HALF)
1110*61046927SAndroid Build Coastguard Worker ec_half_dst_size += interval_size;
1111*61046927SAndroid Build Coastguard Worker } else {
1112*61046927SAndroid Build Coastguard Worker dst_size += interval_size;
1113*61046927SAndroid Build Coastguard Worker if (other_interval->interval.reg->flags & IR3_REG_HALF)
1114*61046927SAndroid Build Coastguard Worker half_dst_size += interval_size;
1115*61046927SAndroid Build Coastguard Worker }
1116*61046927SAndroid Build Coastguard Worker }
1117*61046927SAndroid Build Coastguard Worker
1118*61046927SAndroid Build Coastguard Worker /* Check if we can sort the intervals *after* this one and have enough
1119*61046927SAndroid Build Coastguard Worker * space leftover to accomodate all intervals, keeping in mind that killed
1120*61046927SAndroid Build Coastguard Worker * sources overlap non-tied destinations. Also check that we have enough
1121*61046927SAndroid Build Coastguard Worker * space leftover for half-registers, if we're inserting a half-register
1122*61046927SAndroid Build Coastguard Worker * (otherwise we only shift any half-registers down so they should be
1123*61046927SAndroid Build Coastguard Worker * safe).
1124*61046927SAndroid Build Coastguard Worker */
1125*61046927SAndroid Build Coastguard Worker if (candidate_start + removed_size + ec_dst_size +
1126*61046927SAndroid Build Coastguard Worker MAX2(removed_killed_size, dst_size) <= file->size &&
1127*61046927SAndroid Build Coastguard Worker (reg_align != 1 ||
1128*61046927SAndroid Build Coastguard Worker candidate_start + removed_half_size + ec_half_dst_size +
1129*61046927SAndroid Build Coastguard Worker MAX2(removed_killed_half_size, half_dst_size) <= file_size)) {
1130*61046927SAndroid Build Coastguard Worker start_reg = candidate_start;
1131*61046927SAndroid Build Coastguard Worker break;
1132*61046927SAndroid Build Coastguard Worker }
1133*61046927SAndroid Build Coastguard Worker
1134*61046927SAndroid Build Coastguard Worker /* We assume that all frozen intervals are at the start and that we
1135*61046927SAndroid Build Coastguard Worker * can avoid popping them.
1136*61046927SAndroid Build Coastguard Worker */
1137*61046927SAndroid Build Coastguard Worker assert(!interval->frozen);
1138*61046927SAndroid Build Coastguard Worker
1139*61046927SAndroid Build Coastguard Worker /* Killed sources are different because they go at the end and can
1140*61046927SAndroid Build Coastguard Worker * overlap the register we're trying to add.
1141*61046927SAndroid Build Coastguard Worker */
1142*61046927SAndroid Build Coastguard Worker unsigned interval_size = interval->physreg_end - interval->physreg_start;
1143*61046927SAndroid Build Coastguard Worker if (interval->is_killed) {
1144*61046927SAndroid Build Coastguard Worker removed_killed_size += interval_size;
1145*61046927SAndroid Build Coastguard Worker if (interval->interval.reg->flags & IR3_REG_HALF)
1146*61046927SAndroid Build Coastguard Worker removed_killed_half_size += interval_size;
1147*61046927SAndroid Build Coastguard Worker } else {
1148*61046927SAndroid Build Coastguard Worker removed_size += interval_size;
1149*61046927SAndroid Build Coastguard Worker if (interval->interval.reg->flags & IR3_REG_HALF)
1150*61046927SAndroid Build Coastguard Worker removed_half_size += interval_size;
1151*61046927SAndroid Build Coastguard Worker }
1152*61046927SAndroid Build Coastguard Worker
1153*61046927SAndroid Build Coastguard Worker /* Now that we've done the accounting, pop this off */
1154*61046927SAndroid Build Coastguard Worker d("popping interval %u physreg %u%s\n", interval->interval.reg->name,
1155*61046927SAndroid Build Coastguard Worker interval->physreg_start, interval->is_killed ? ", killed" : "");
1156*61046927SAndroid Build Coastguard Worker array_insert(ctx, intervals, ra_pop_interval(ctx, file, interval));
1157*61046927SAndroid Build Coastguard Worker }
1158*61046927SAndroid Build Coastguard Worker
1159*61046927SAndroid Build Coastguard Worker /* TODO: In addition to skipping registers at the beginning that are
1160*61046927SAndroid Build Coastguard Worker * well-packed, we should try to skip registers at the end.
1161*61046927SAndroid Build Coastguard Worker */
1162*61046927SAndroid Build Coastguard Worker
1163*61046927SAndroid Build Coastguard Worker qsort(intervals, intervals_count, sizeof(*intervals), removed_interval_cmp);
1164*61046927SAndroid Build Coastguard Worker qsort(dsts, dsts_count, sizeof(*dsts), dsts_cmp);
1165*61046927SAndroid Build Coastguard Worker
1166*61046927SAndroid Build Coastguard Worker physreg_t live_reg = start_reg;
1167*61046927SAndroid Build Coastguard Worker physreg_t dst_reg = (physreg_t)~0;
1168*61046927SAndroid Build Coastguard Worker physreg_t ret_reg = (physreg_t)~0;
1169*61046927SAndroid Build Coastguard Worker unsigned dst_index = 0;
1170*61046927SAndroid Build Coastguard Worker unsigned live_index = 0;
1171*61046927SAndroid Build Coastguard Worker
1172*61046927SAndroid Build Coastguard Worker /* We have two lists of intervals to process, live intervals and destination
1173*61046927SAndroid Build Coastguard Worker * intervals. Process them in the order of the disgram in insert_cmp().
1174*61046927SAndroid Build Coastguard Worker */
1175*61046927SAndroid Build Coastguard Worker while (live_index < intervals_count || dst_index < dsts_count) {
1176*61046927SAndroid Build Coastguard Worker bool process_dst;
1177*61046927SAndroid Build Coastguard Worker if (live_index == intervals_count) {
1178*61046927SAndroid Build Coastguard Worker process_dst = true;
1179*61046927SAndroid Build Coastguard Worker } else if (dst_index == dsts_count) {
1180*61046927SAndroid Build Coastguard Worker process_dst = false;
1181*61046927SAndroid Build Coastguard Worker } else {
1182*61046927SAndroid Build Coastguard Worker struct ir3_register *dst = dsts[dst_index];
1183*61046927SAndroid Build Coastguard Worker struct ra_interval *live_interval = intervals[live_index].interval;
1184*61046927SAndroid Build Coastguard Worker
1185*61046927SAndroid Build Coastguard Worker bool live_half = live_interval->interval.reg->flags & IR3_REG_HALF;
1186*61046927SAndroid Build Coastguard Worker bool live_killed = live_interval->is_killed;
1187*61046927SAndroid Build Coastguard Worker bool dst_half = dst->flags & IR3_REG_HALF;
1188*61046927SAndroid Build Coastguard Worker bool dst_early_clobber = is_early_clobber(dst);
1189*61046927SAndroid Build Coastguard Worker
1190*61046927SAndroid Build Coastguard Worker if (live_half && !live_killed) {
1191*61046927SAndroid Build Coastguard Worker /* far-left of diagram. */
1192*61046927SAndroid Build Coastguard Worker process_dst = false;
1193*61046927SAndroid Build Coastguard Worker } else if (dst_half && dst_early_clobber) {
1194*61046927SAndroid Build Coastguard Worker /* mid-left of diagram. */
1195*61046927SAndroid Build Coastguard Worker process_dst = true;
1196*61046927SAndroid Build Coastguard Worker } else if (!dst_early_clobber) {
1197*61046927SAndroid Build Coastguard Worker /* bottom of disagram. */
1198*61046927SAndroid Build Coastguard Worker process_dst = true;
1199*61046927SAndroid Build Coastguard Worker } else if (live_killed) {
1200*61046927SAndroid Build Coastguard Worker /* middle of diagram. */
1201*61046927SAndroid Build Coastguard Worker process_dst = false;
1202*61046927SAndroid Build Coastguard Worker } else if (!dst_half && dst_early_clobber) {
1203*61046927SAndroid Build Coastguard Worker /* mid-right of diagram. */
1204*61046927SAndroid Build Coastguard Worker process_dst = true;
1205*61046927SAndroid Build Coastguard Worker } else {
1206*61046927SAndroid Build Coastguard Worker /* far right of diagram. */
1207*61046927SAndroid Build Coastguard Worker assert(!live_killed && !live_half);
1208*61046927SAndroid Build Coastguard Worker process_dst = false;
1209*61046927SAndroid Build Coastguard Worker }
1210*61046927SAndroid Build Coastguard Worker }
1211*61046927SAndroid Build Coastguard Worker
1212*61046927SAndroid Build Coastguard Worker struct ir3_register *cur_reg =
1213*61046927SAndroid Build Coastguard Worker process_dst ? dsts[dst_index] :
1214*61046927SAndroid Build Coastguard Worker intervals[live_index].interval->interval.reg;
1215*61046927SAndroid Build Coastguard Worker
1216*61046927SAndroid Build Coastguard Worker physreg_t physreg;
1217*61046927SAndroid Build Coastguard Worker if (process_dst && !is_early_clobber(cur_reg)) {
1218*61046927SAndroid Build Coastguard Worker if (dst_reg == (physreg_t)~0)
1219*61046927SAndroid Build Coastguard Worker dst_reg = live_reg;
1220*61046927SAndroid Build Coastguard Worker physreg = dst_reg;
1221*61046927SAndroid Build Coastguard Worker } else {
1222*61046927SAndroid Build Coastguard Worker physreg = live_reg;
1223*61046927SAndroid Build Coastguard Worker struct ra_interval *live_interval = intervals[live_index].interval;
1224*61046927SAndroid Build Coastguard Worker bool live_killed = live_interval->is_killed;
1225*61046927SAndroid Build Coastguard Worker /* If this is live-through and we've processed the destinations, we
1226*61046927SAndroid Build Coastguard Worker * need to make sure we take into account any overlapping destinations.
1227*61046927SAndroid Build Coastguard Worker */
1228*61046927SAndroid Build Coastguard Worker if (!live_killed && dst_reg != (physreg_t)~0)
1229*61046927SAndroid Build Coastguard Worker physreg = MAX2(physreg, dst_reg);
1230*61046927SAndroid Build Coastguard Worker }
1231*61046927SAndroid Build Coastguard Worker
1232*61046927SAndroid Build Coastguard Worker if (!(cur_reg->flags & IR3_REG_HALF))
1233*61046927SAndroid Build Coastguard Worker physreg = ALIGN(physreg, 2);
1234*61046927SAndroid Build Coastguard Worker
1235*61046927SAndroid Build Coastguard Worker d("pushing reg %u physreg %u\n", cur_reg->name, physreg);
1236*61046927SAndroid Build Coastguard Worker
1237*61046927SAndroid Build Coastguard Worker unsigned interval_size = reg_size(cur_reg);
1238*61046927SAndroid Build Coastguard Worker if (physreg + interval_size >
1239*61046927SAndroid Build Coastguard Worker reg_file_size(file, cur_reg)) {
1240*61046927SAndroid Build Coastguard Worker d("ran out of room for interval %u!\n",
1241*61046927SAndroid Build Coastguard Worker cur_reg->name);
1242*61046927SAndroid Build Coastguard Worker unreachable("reg pressure calculation was wrong!");
1243*61046927SAndroid Build Coastguard Worker return 0;
1244*61046927SAndroid Build Coastguard Worker }
1245*61046927SAndroid Build Coastguard Worker
1246*61046927SAndroid Build Coastguard Worker if (process_dst) {
1247*61046927SAndroid Build Coastguard Worker if (cur_reg == reg) {
1248*61046927SAndroid Build Coastguard Worker ret_reg = physreg;
1249*61046927SAndroid Build Coastguard Worker } else {
1250*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[cur_reg->name];
1251*61046927SAndroid Build Coastguard Worker interval->physreg_start = physreg;
1252*61046927SAndroid Build Coastguard Worker interval->physreg_end = physreg + interval_size;
1253*61046927SAndroid Build Coastguard Worker }
1254*61046927SAndroid Build Coastguard Worker dst_index++;
1255*61046927SAndroid Build Coastguard Worker } else {
1256*61046927SAndroid Build Coastguard Worker ra_push_interval(ctx, file, &intervals[live_index], physreg);
1257*61046927SAndroid Build Coastguard Worker live_index++;
1258*61046927SAndroid Build Coastguard Worker }
1259*61046927SAndroid Build Coastguard Worker
1260*61046927SAndroid Build Coastguard Worker physreg += interval_size;
1261*61046927SAndroid Build Coastguard Worker
1262*61046927SAndroid Build Coastguard Worker if (process_dst && !is_early_clobber(cur_reg)) {
1263*61046927SAndroid Build Coastguard Worker dst_reg = physreg;
1264*61046927SAndroid Build Coastguard Worker } else {
1265*61046927SAndroid Build Coastguard Worker live_reg = physreg;
1266*61046927SAndroid Build Coastguard Worker }
1267*61046927SAndroid Build Coastguard Worker }
1268*61046927SAndroid Build Coastguard Worker
1269*61046927SAndroid Build Coastguard Worker /* If we shuffled around a tied source that is killed, we may have to update
1270*61046927SAndroid Build Coastguard Worker * its corresponding destination since we didn't insert it above.
1271*61046927SAndroid Build Coastguard Worker */
1272*61046927SAndroid Build Coastguard Worker ra_foreach_dst (dst, reg->instr) {
1273*61046927SAndroid Build Coastguard Worker if (dst == reg)
1274*61046927SAndroid Build Coastguard Worker break;
1275*61046927SAndroid Build Coastguard Worker
1276*61046927SAndroid Build Coastguard Worker struct ir3_register *tied = dst->tied;
1277*61046927SAndroid Build Coastguard Worker if (!tied)
1278*61046927SAndroid Build Coastguard Worker continue;
1279*61046927SAndroid Build Coastguard Worker
1280*61046927SAndroid Build Coastguard Worker struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
1281*61046927SAndroid Build Coastguard Worker if (!tied_interval->is_killed)
1282*61046927SAndroid Build Coastguard Worker continue;
1283*61046927SAndroid Build Coastguard Worker
1284*61046927SAndroid Build Coastguard Worker struct ra_interval *dst_interval = &ctx->intervals[dst->name];
1285*61046927SAndroid Build Coastguard Worker unsigned dst_size = reg_size(dst);
1286*61046927SAndroid Build Coastguard Worker dst_interval->physreg_start = ra_interval_get_physreg(tied_interval);
1287*61046927SAndroid Build Coastguard Worker dst_interval->physreg_end = dst_interval->physreg_start + dst_size;
1288*61046927SAndroid Build Coastguard Worker }
1289*61046927SAndroid Build Coastguard Worker
1290*61046927SAndroid Build Coastguard Worker return ret_reg;
1291*61046927SAndroid Build Coastguard Worker }
1292*61046927SAndroid Build Coastguard Worker
1293*61046927SAndroid Build Coastguard Worker void
ra_update_affinity(unsigned file_size,struct ir3_register * reg,physreg_t physreg)1294*61046927SAndroid Build Coastguard Worker ra_update_affinity(unsigned file_size, struct ir3_register *reg,
1295*61046927SAndroid Build Coastguard Worker physreg_t physreg)
1296*61046927SAndroid Build Coastguard Worker {
1297*61046927SAndroid Build Coastguard Worker if (!reg->merge_set || reg->merge_set->preferred_reg != (physreg_t)~0)
1298*61046927SAndroid Build Coastguard Worker return;
1299*61046927SAndroid Build Coastguard Worker
1300*61046927SAndroid Build Coastguard Worker if (physreg < reg->merge_set_offset)
1301*61046927SAndroid Build Coastguard Worker return;
1302*61046927SAndroid Build Coastguard Worker
1303*61046927SAndroid Build Coastguard Worker if ((physreg - reg->merge_set_offset + reg->merge_set->size) > file_size)
1304*61046927SAndroid Build Coastguard Worker return;
1305*61046927SAndroid Build Coastguard Worker
1306*61046927SAndroid Build Coastguard Worker reg->merge_set->preferred_reg = physreg - reg->merge_set_offset;
1307*61046927SAndroid Build Coastguard Worker }
1308*61046927SAndroid Build Coastguard Worker
1309*61046927SAndroid Build Coastguard Worker /* Try to find free space for a register without shuffling anything. This uses
1310*61046927SAndroid Build Coastguard Worker * a round-robin algorithm to reduce false dependencies.
1311*61046927SAndroid Build Coastguard Worker */
1312*61046927SAndroid Build Coastguard Worker static physreg_t
find_best_gap(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * dst,unsigned file_size,unsigned size,unsigned alignment)1313*61046927SAndroid Build Coastguard Worker find_best_gap(struct ra_ctx *ctx, struct ra_file *file,
1314*61046927SAndroid Build Coastguard Worker struct ir3_register *dst, unsigned file_size, unsigned size,
1315*61046927SAndroid Build Coastguard Worker unsigned alignment)
1316*61046927SAndroid Build Coastguard Worker {
1317*61046927SAndroid Build Coastguard Worker /* This can happen if we create a very large merge set. Just bail out in that
1318*61046927SAndroid Build Coastguard Worker * case.
1319*61046927SAndroid Build Coastguard Worker */
1320*61046927SAndroid Build Coastguard Worker if (size > file_size)
1321*61046927SAndroid Build Coastguard Worker return (physreg_t) ~0;
1322*61046927SAndroid Build Coastguard Worker
1323*61046927SAndroid Build Coastguard Worker BITSET_WORD *available =
1324*61046927SAndroid Build Coastguard Worker is_early_clobber(dst) ? file->available_to_evict : file->available;
1325*61046927SAndroid Build Coastguard Worker
1326*61046927SAndroid Build Coastguard Worker unsigned start = ALIGN(file->start, alignment) % (file_size - size + alignment);
1327*61046927SAndroid Build Coastguard Worker unsigned candidate = start;
1328*61046927SAndroid Build Coastguard Worker do {
1329*61046927SAndroid Build Coastguard Worker bool is_available = true;
1330*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < size; i++) {
1331*61046927SAndroid Build Coastguard Worker if (!BITSET_TEST(available, candidate + i)) {
1332*61046927SAndroid Build Coastguard Worker is_available = false;
1333*61046927SAndroid Build Coastguard Worker break;
1334*61046927SAndroid Build Coastguard Worker }
1335*61046927SAndroid Build Coastguard Worker }
1336*61046927SAndroid Build Coastguard Worker
1337*61046927SAndroid Build Coastguard Worker if (is_available) {
1338*61046927SAndroid Build Coastguard Worker is_available =
1339*61046927SAndroid Build Coastguard Worker !check_dst_overlap(ctx, file, dst, candidate, candidate + size);
1340*61046927SAndroid Build Coastguard Worker }
1341*61046927SAndroid Build Coastguard Worker
1342*61046927SAndroid Build Coastguard Worker if (is_available) {
1343*61046927SAndroid Build Coastguard Worker file->start = (candidate + size) % file_size;
1344*61046927SAndroid Build Coastguard Worker return candidate;
1345*61046927SAndroid Build Coastguard Worker }
1346*61046927SAndroid Build Coastguard Worker
1347*61046927SAndroid Build Coastguard Worker candidate += alignment;
1348*61046927SAndroid Build Coastguard Worker if (candidate + size > file_size)
1349*61046927SAndroid Build Coastguard Worker candidate = 0;
1350*61046927SAndroid Build Coastguard Worker } while (candidate != start);
1351*61046927SAndroid Build Coastguard Worker
1352*61046927SAndroid Build Coastguard Worker return (physreg_t)~0;
1353*61046927SAndroid Build Coastguard Worker }
1354*61046927SAndroid Build Coastguard Worker
1355*61046927SAndroid Build Coastguard Worker static physreg_t
try_allocate_src(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg)1356*61046927SAndroid Build Coastguard Worker try_allocate_src(struct ra_ctx *ctx, struct ra_file *file,
1357*61046927SAndroid Build Coastguard Worker struct ir3_register *reg)
1358*61046927SAndroid Build Coastguard Worker {
1359*61046927SAndroid Build Coastguard Worker unsigned file_size = reg_file_size(file, reg);
1360*61046927SAndroid Build Coastguard Worker unsigned size = reg_size(reg);
1361*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < reg->instr->srcs_count; i++) {
1362*61046927SAndroid Build Coastguard Worker struct ir3_register *src = reg->instr->srcs[i];
1363*61046927SAndroid Build Coastguard Worker if (!ra_reg_is_src(src))
1364*61046927SAndroid Build Coastguard Worker continue;
1365*61046927SAndroid Build Coastguard Worker if (ra_get_file(ctx, src) == file && reg_size(src) >= size) {
1366*61046927SAndroid Build Coastguard Worker struct ra_interval *src_interval = &ctx->intervals[src->def->name];
1367*61046927SAndroid Build Coastguard Worker physreg_t src_physreg = ra_interval_get_physreg(src_interval);
1368*61046927SAndroid Build Coastguard Worker if (src_physreg % reg_elem_size(reg) == 0 &&
1369*61046927SAndroid Build Coastguard Worker src_physreg + size <= file_size &&
1370*61046927SAndroid Build Coastguard Worker get_reg_specified(ctx, file, reg, src_physreg, false))
1371*61046927SAndroid Build Coastguard Worker return src_physreg;
1372*61046927SAndroid Build Coastguard Worker }
1373*61046927SAndroid Build Coastguard Worker }
1374*61046927SAndroid Build Coastguard Worker
1375*61046927SAndroid Build Coastguard Worker return ~0;
1376*61046927SAndroid Build Coastguard Worker }
1377*61046927SAndroid Build Coastguard Worker
1378*61046927SAndroid Build Coastguard Worker static bool
rpt_has_unique_merge_set(struct ir3_instruction * instr)1379*61046927SAndroid Build Coastguard Worker rpt_has_unique_merge_set(struct ir3_instruction *instr)
1380*61046927SAndroid Build Coastguard Worker {
1381*61046927SAndroid Build Coastguard Worker assert(ir3_instr_is_rpt(instr));
1382*61046927SAndroid Build Coastguard Worker
1383*61046927SAndroid Build Coastguard Worker if (!instr->dsts[0]->merge_set)
1384*61046927SAndroid Build Coastguard Worker return false;
1385*61046927SAndroid Build Coastguard Worker
1386*61046927SAndroid Build Coastguard Worker struct ir3_instruction *first = ir3_instr_first_rpt(instr);
1387*61046927SAndroid Build Coastguard Worker struct ir3_register *def = first->dsts[0];
1388*61046927SAndroid Build Coastguard Worker
1389*61046927SAndroid Build Coastguard Worker if (def->merge_set != instr->dsts[0]->merge_set ||
1390*61046927SAndroid Build Coastguard Worker def->merge_set->regs_count != ir3_instr_rpt_length(first)) {
1391*61046927SAndroid Build Coastguard Worker return false;
1392*61046927SAndroid Build Coastguard Worker }
1393*61046927SAndroid Build Coastguard Worker
1394*61046927SAndroid Build Coastguard Worker unsigned i = 0;
1395*61046927SAndroid Build Coastguard Worker
1396*61046927SAndroid Build Coastguard Worker foreach_instr_rpt (rpt, first) {
1397*61046927SAndroid Build Coastguard Worker if (rpt->dsts[0] != def->merge_set->regs[i++])
1398*61046927SAndroid Build Coastguard Worker return false;
1399*61046927SAndroid Build Coastguard Worker }
1400*61046927SAndroid Build Coastguard Worker
1401*61046927SAndroid Build Coastguard Worker return true;
1402*61046927SAndroid Build Coastguard Worker }
1403*61046927SAndroid Build Coastguard Worker
1404*61046927SAndroid Build Coastguard Worker /* This is the main entrypoint for picking a register. Pick a free register
1405*61046927SAndroid Build Coastguard Worker * for "reg", shuffling around sources if necessary. In the normal case where
1406*61046927SAndroid Build Coastguard Worker * "is_source" is false, this register can overlap with killed sources
1407*61046927SAndroid Build Coastguard Worker * (intervals with "is_killed == true"). If "is_source" is true, then
1408*61046927SAndroid Build Coastguard Worker * is_killed is ignored and the register returned must not overlap with killed
1409*61046927SAndroid Build Coastguard Worker * sources. This must be used for tied registers, because we're actually
1410*61046927SAndroid Build Coastguard Worker * allocating the destination and the tied source at the same time.
1411*61046927SAndroid Build Coastguard Worker */
1412*61046927SAndroid Build Coastguard Worker
1413*61046927SAndroid Build Coastguard Worker static physreg_t
get_reg(struct ra_ctx * ctx,struct ra_file * file,struct ir3_register * reg)1414*61046927SAndroid Build Coastguard Worker get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg)
1415*61046927SAndroid Build Coastguard Worker {
1416*61046927SAndroid Build Coastguard Worker unsigned file_size = reg_file_size(file, reg);
1417*61046927SAndroid Build Coastguard Worker if (reg->merge_set && reg->merge_set->preferred_reg != (physreg_t)~0) {
1418*61046927SAndroid Build Coastguard Worker physreg_t preferred_reg =
1419*61046927SAndroid Build Coastguard Worker reg->merge_set->preferred_reg + reg->merge_set_offset;
1420*61046927SAndroid Build Coastguard Worker if (preferred_reg + reg_size(reg) <= file_size &&
1421*61046927SAndroid Build Coastguard Worker preferred_reg % reg_elem_size(reg) == 0 &&
1422*61046927SAndroid Build Coastguard Worker get_reg_specified(ctx, file, reg, preferred_reg, false))
1423*61046927SAndroid Build Coastguard Worker return preferred_reg;
1424*61046927SAndroid Build Coastguard Worker }
1425*61046927SAndroid Build Coastguard Worker
1426*61046927SAndroid Build Coastguard Worker /* For repeated instructions whose merge set is unique (i.e., only used for
1427*61046927SAndroid Build Coastguard Worker * these repeated instructions), try to first allocate one of their sources
1428*61046927SAndroid Build Coastguard Worker * (for the same reason as for ALU/SFU instructions explained below). This
1429*61046927SAndroid Build Coastguard Worker * also prevents us from allocating a new register range for this merge set
1430*61046927SAndroid Build Coastguard Worker * when the one from a source could be reused.
1431*61046927SAndroid Build Coastguard Worker */
1432*61046927SAndroid Build Coastguard Worker if (ir3_instr_is_rpt(reg->instr) && rpt_has_unique_merge_set(reg->instr)) {
1433*61046927SAndroid Build Coastguard Worker physreg_t src_reg = try_allocate_src(ctx, file, reg);
1434*61046927SAndroid Build Coastguard Worker if (src_reg != (physreg_t)~0)
1435*61046927SAndroid Build Coastguard Worker return src_reg;
1436*61046927SAndroid Build Coastguard Worker }
1437*61046927SAndroid Build Coastguard Worker
1438*61046927SAndroid Build Coastguard Worker /* If this register is a subset of a merge set which we have not picked a
1439*61046927SAndroid Build Coastguard Worker * register for, first try to allocate enough space for the entire merge
1440*61046927SAndroid Build Coastguard Worker * set.
1441*61046927SAndroid Build Coastguard Worker */
1442*61046927SAndroid Build Coastguard Worker unsigned size = reg_size(reg);
1443*61046927SAndroid Build Coastguard Worker if (reg->merge_set && reg->merge_set->preferred_reg == (physreg_t)~0 &&
1444*61046927SAndroid Build Coastguard Worker size < reg->merge_set->size) {
1445*61046927SAndroid Build Coastguard Worker physreg_t best_reg = find_best_gap(ctx, file, reg, file_size,
1446*61046927SAndroid Build Coastguard Worker reg->merge_set->size,
1447*61046927SAndroid Build Coastguard Worker reg->merge_set->alignment);
1448*61046927SAndroid Build Coastguard Worker if (best_reg != (physreg_t)~0u) {
1449*61046927SAndroid Build Coastguard Worker best_reg += reg->merge_set_offset;
1450*61046927SAndroid Build Coastguard Worker return best_reg;
1451*61046927SAndroid Build Coastguard Worker }
1452*61046927SAndroid Build Coastguard Worker }
1453*61046927SAndroid Build Coastguard Worker
1454*61046927SAndroid Build Coastguard Worker /* For ALU and SFU instructions, if the src reg is avail to pick, use it.
1455*61046927SAndroid Build Coastguard Worker * Because this doesn't introduce unnecessary dependencies, and it
1456*61046927SAndroid Build Coastguard Worker * potentially avoids needing (ss) syncs for write after read hazards for
1457*61046927SAndroid Build Coastguard Worker * SFU instructions:
1458*61046927SAndroid Build Coastguard Worker */
1459*61046927SAndroid Build Coastguard Worker if (is_sfu(reg->instr) || is_alu(reg->instr)) {
1460*61046927SAndroid Build Coastguard Worker physreg_t src_reg = try_allocate_src(ctx, file, reg);
1461*61046927SAndroid Build Coastguard Worker if (src_reg != (physreg_t)~0)
1462*61046927SAndroid Build Coastguard Worker return src_reg;
1463*61046927SAndroid Build Coastguard Worker }
1464*61046927SAndroid Build Coastguard Worker
1465*61046927SAndroid Build Coastguard Worker physreg_t best_reg =
1466*61046927SAndroid Build Coastguard Worker find_best_gap(ctx, file, reg, file_size, size, reg_elem_size(reg));
1467*61046927SAndroid Build Coastguard Worker if (best_reg != (physreg_t)~0u) {
1468*61046927SAndroid Build Coastguard Worker return best_reg;
1469*61046927SAndroid Build Coastguard Worker }
1470*61046927SAndroid Build Coastguard Worker
1471*61046927SAndroid Build Coastguard Worker /* Ok, we couldn't find anything that fits. Here is where we have to start
1472*61046927SAndroid Build Coastguard Worker * moving things around to make stuff fit. First try solely evicting
1473*61046927SAndroid Build Coastguard Worker * registers in the way.
1474*61046927SAndroid Build Coastguard Worker */
1475*61046927SAndroid Build Coastguard Worker unsigned best_eviction_count = ~0;
1476*61046927SAndroid Build Coastguard Worker for (physreg_t i = 0; i + size <= file_size; i += reg_elem_size(reg)) {
1477*61046927SAndroid Build Coastguard Worker unsigned eviction_count;
1478*61046927SAndroid Build Coastguard Worker if (try_evict_regs(ctx, file, reg, i, &eviction_count, false, true)) {
1479*61046927SAndroid Build Coastguard Worker if (eviction_count < best_eviction_count) {
1480*61046927SAndroid Build Coastguard Worker best_eviction_count = eviction_count;
1481*61046927SAndroid Build Coastguard Worker best_reg = i;
1482*61046927SAndroid Build Coastguard Worker }
1483*61046927SAndroid Build Coastguard Worker }
1484*61046927SAndroid Build Coastguard Worker }
1485*61046927SAndroid Build Coastguard Worker
1486*61046927SAndroid Build Coastguard Worker if (best_eviction_count != ~0) {
1487*61046927SAndroid Build Coastguard Worker ASSERTED bool result = try_evict_regs(
1488*61046927SAndroid Build Coastguard Worker ctx, file, reg, best_reg, &best_eviction_count, false, false);
1489*61046927SAndroid Build Coastguard Worker assert(result);
1490*61046927SAndroid Build Coastguard Worker return best_reg;
1491*61046927SAndroid Build Coastguard Worker }
1492*61046927SAndroid Build Coastguard Worker
1493*61046927SAndroid Build Coastguard Worker /* Use the dumb fallback only if try_evict_regs() fails. */
1494*61046927SAndroid Build Coastguard Worker return compress_regs_left(ctx, file, reg);
1495*61046927SAndroid Build Coastguard Worker }
1496*61046927SAndroid Build Coastguard Worker
1497*61046927SAndroid Build Coastguard Worker static void
assign_reg(struct ir3_instruction * instr,struct ir3_register * reg,unsigned num)1498*61046927SAndroid Build Coastguard Worker assign_reg(struct ir3_instruction *instr, struct ir3_register *reg,
1499*61046927SAndroid Build Coastguard Worker unsigned num)
1500*61046927SAndroid Build Coastguard Worker {
1501*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_ARRAY) {
1502*61046927SAndroid Build Coastguard Worker reg->array.base = num;
1503*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_RELATIV)
1504*61046927SAndroid Build Coastguard Worker reg->array.offset += num;
1505*61046927SAndroid Build Coastguard Worker else
1506*61046927SAndroid Build Coastguard Worker reg->num = num + reg->array.offset;
1507*61046927SAndroid Build Coastguard Worker } else {
1508*61046927SAndroid Build Coastguard Worker reg->num = num;
1509*61046927SAndroid Build Coastguard Worker }
1510*61046927SAndroid Build Coastguard Worker }
1511*61046927SAndroid Build Coastguard Worker
1512*61046927SAndroid Build Coastguard Worker static void
mark_src_killed(struct ra_ctx * ctx,struct ir3_register * src)1513*61046927SAndroid Build Coastguard Worker mark_src_killed(struct ra_ctx *ctx, struct ir3_register *src)
1514*61046927SAndroid Build Coastguard Worker {
1515*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[src->def->name];
1516*61046927SAndroid Build Coastguard Worker
1517*61046927SAndroid Build Coastguard Worker if (!(src->flags & IR3_REG_FIRST_KILL) || interval->is_killed ||
1518*61046927SAndroid Build Coastguard Worker interval->interval.parent ||
1519*61046927SAndroid Build Coastguard Worker !rb_tree_is_empty(&interval->interval.children))
1520*61046927SAndroid Build Coastguard Worker return;
1521*61046927SAndroid Build Coastguard Worker
1522*61046927SAndroid Build Coastguard Worker ra_file_mark_killed(ra_get_file(ctx, src), interval);
1523*61046927SAndroid Build Coastguard Worker }
1524*61046927SAndroid Build Coastguard Worker
1525*61046927SAndroid Build Coastguard Worker static void
insert_dst(struct ra_ctx * ctx,struct ir3_register * dst)1526*61046927SAndroid Build Coastguard Worker insert_dst(struct ra_ctx *ctx, struct ir3_register *dst)
1527*61046927SAndroid Build Coastguard Worker {
1528*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, dst);
1529*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[dst->name];
1530*61046927SAndroid Build Coastguard Worker
1531*61046927SAndroid Build Coastguard Worker d("insert dst %u physreg %u", dst->name, ra_interval_get_physreg(interval));
1532*61046927SAndroid Build Coastguard Worker
1533*61046927SAndroid Build Coastguard Worker if (!(dst->flags & IR3_REG_UNUSED))
1534*61046927SAndroid Build Coastguard Worker ra_file_insert(file, interval);
1535*61046927SAndroid Build Coastguard Worker
1536*61046927SAndroid Build Coastguard Worker assign_reg(dst->instr, dst, ra_interval_get_num(interval));
1537*61046927SAndroid Build Coastguard Worker }
1538*61046927SAndroid Build Coastguard Worker
1539*61046927SAndroid Build Coastguard Worker static void
allocate_dst_fixed(struct ra_ctx * ctx,struct ir3_register * dst,physreg_t physreg)1540*61046927SAndroid Build Coastguard Worker allocate_dst_fixed(struct ra_ctx *ctx, struct ir3_register *dst,
1541*61046927SAndroid Build Coastguard Worker physreg_t physreg)
1542*61046927SAndroid Build Coastguard Worker {
1543*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, dst);
1544*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[dst->name];
1545*61046927SAndroid Build Coastguard Worker ra_update_affinity(file->size, dst, physreg);
1546*61046927SAndroid Build Coastguard Worker
1547*61046927SAndroid Build Coastguard Worker ra_interval_init(interval, dst);
1548*61046927SAndroid Build Coastguard Worker interval->physreg_start = physreg;
1549*61046927SAndroid Build Coastguard Worker interval->physreg_end = physreg + reg_size(dst);
1550*61046927SAndroid Build Coastguard Worker }
1551*61046927SAndroid Build Coastguard Worker
1552*61046927SAndroid Build Coastguard Worker /* If a tied destination interferes with its source register, we have to insert
1553*61046927SAndroid Build Coastguard Worker * a copy beforehand to copy the source to the destination. Because we are using
1554*61046927SAndroid Build Coastguard Worker * the parallel_copies array and not creating a separate copy, this copy will
1555*61046927SAndroid Build Coastguard Worker * happen in parallel with any shuffling around of the tied source, so we have
1556*61046927SAndroid Build Coastguard Worker * to copy the source *as it exists before it is shuffled around*. We do this by
1557*61046927SAndroid Build Coastguard Worker * inserting the copy early, before any other copies are inserted. We don't
1558*61046927SAndroid Build Coastguard Worker * actually know the destination of the copy, but that's ok because the
1559*61046927SAndroid Build Coastguard Worker * dst_interval will be filled out later.
1560*61046927SAndroid Build Coastguard Worker */
1561*61046927SAndroid Build Coastguard Worker static void
insert_tied_dst_copy(struct ra_ctx * ctx,struct ir3_register * dst)1562*61046927SAndroid Build Coastguard Worker insert_tied_dst_copy(struct ra_ctx *ctx, struct ir3_register *dst)
1563*61046927SAndroid Build Coastguard Worker {
1564*61046927SAndroid Build Coastguard Worker struct ir3_register *tied = dst->tied;
1565*61046927SAndroid Build Coastguard Worker
1566*61046927SAndroid Build Coastguard Worker if (!tied)
1567*61046927SAndroid Build Coastguard Worker return;
1568*61046927SAndroid Build Coastguard Worker
1569*61046927SAndroid Build Coastguard Worker struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
1570*61046927SAndroid Build Coastguard Worker struct ra_interval *dst_interval = &ctx->intervals[dst->name];
1571*61046927SAndroid Build Coastguard Worker
1572*61046927SAndroid Build Coastguard Worker if (tied_interval->is_killed)
1573*61046927SAndroid Build Coastguard Worker return;
1574*61046927SAndroid Build Coastguard Worker
1575*61046927SAndroid Build Coastguard Worker physreg_t tied_physreg = ra_interval_get_physreg(tied_interval);
1576*61046927SAndroid Build Coastguard Worker
1577*61046927SAndroid Build Coastguard Worker array_insert(ctx, ctx->parallel_copies,
1578*61046927SAndroid Build Coastguard Worker (struct ra_parallel_copy){
1579*61046927SAndroid Build Coastguard Worker .interval = dst_interval,
1580*61046927SAndroid Build Coastguard Worker .src = tied_physreg,
1581*61046927SAndroid Build Coastguard Worker });
1582*61046927SAndroid Build Coastguard Worker }
1583*61046927SAndroid Build Coastguard Worker
1584*61046927SAndroid Build Coastguard Worker static void
allocate_dst(struct ra_ctx * ctx,struct ir3_register * dst)1585*61046927SAndroid Build Coastguard Worker allocate_dst(struct ra_ctx *ctx, struct ir3_register *dst)
1586*61046927SAndroid Build Coastguard Worker {
1587*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, dst);
1588*61046927SAndroid Build Coastguard Worker
1589*61046927SAndroid Build Coastguard Worker struct ir3_register *tied = dst->tied;
1590*61046927SAndroid Build Coastguard Worker if (tied) {
1591*61046927SAndroid Build Coastguard Worker struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
1592*61046927SAndroid Build Coastguard Worker if (tied_interval->is_killed) {
1593*61046927SAndroid Build Coastguard Worker /* The easy case: the source is killed, so we can just reuse it
1594*61046927SAndroid Build Coastguard Worker * for the destination.
1595*61046927SAndroid Build Coastguard Worker */
1596*61046927SAndroid Build Coastguard Worker allocate_dst_fixed(ctx, dst, ra_interval_get_physreg(tied_interval));
1597*61046927SAndroid Build Coastguard Worker return;
1598*61046927SAndroid Build Coastguard Worker }
1599*61046927SAndroid Build Coastguard Worker }
1600*61046927SAndroid Build Coastguard Worker
1601*61046927SAndroid Build Coastguard Worker /* All the hard work is done by get_reg here. */
1602*61046927SAndroid Build Coastguard Worker physreg_t physreg = get_reg(ctx, file, dst);
1603*61046927SAndroid Build Coastguard Worker
1604*61046927SAndroid Build Coastguard Worker allocate_dst_fixed(ctx, dst, physreg);
1605*61046927SAndroid Build Coastguard Worker }
1606*61046927SAndroid Build Coastguard Worker
1607*61046927SAndroid Build Coastguard Worker static void
assign_src(struct ra_ctx * ctx,struct ir3_instruction * instr,struct ir3_register * src)1608*61046927SAndroid Build Coastguard Worker assign_src(struct ra_ctx *ctx, struct ir3_instruction *instr,
1609*61046927SAndroid Build Coastguard Worker struct ir3_register *src)
1610*61046927SAndroid Build Coastguard Worker {
1611*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[src->def->name];
1612*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, src);
1613*61046927SAndroid Build Coastguard Worker
1614*61046927SAndroid Build Coastguard Worker struct ir3_register *tied = src->tied;
1615*61046927SAndroid Build Coastguard Worker physreg_t physreg;
1616*61046927SAndroid Build Coastguard Worker if (tied) {
1617*61046927SAndroid Build Coastguard Worker struct ra_interval *tied_interval = &ctx->intervals[tied->name];
1618*61046927SAndroid Build Coastguard Worker physreg = ra_interval_get_physreg(tied_interval);
1619*61046927SAndroid Build Coastguard Worker } else {
1620*61046927SAndroid Build Coastguard Worker physreg = ra_interval_get_physreg(interval);
1621*61046927SAndroid Build Coastguard Worker }
1622*61046927SAndroid Build Coastguard Worker
1623*61046927SAndroid Build Coastguard Worker assign_reg(instr, src, ra_physreg_to_num(physreg, src->flags));
1624*61046927SAndroid Build Coastguard Worker
1625*61046927SAndroid Build Coastguard Worker if (src->flags & IR3_REG_FIRST_KILL)
1626*61046927SAndroid Build Coastguard Worker ra_file_remove(file, interval);
1627*61046927SAndroid Build Coastguard Worker }
1628*61046927SAndroid Build Coastguard Worker
1629*61046927SAndroid Build Coastguard Worker /* Insert a parallel copy instruction before the instruction with the parallel
1630*61046927SAndroid Build Coastguard Worker * copy entries we've built up.
1631*61046927SAndroid Build Coastguard Worker */
1632*61046927SAndroid Build Coastguard Worker static void
insert_parallel_copy_instr(struct ra_ctx * ctx,struct ir3_instruction * instr)1633*61046927SAndroid Build Coastguard Worker insert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
1634*61046927SAndroid Build Coastguard Worker {
1635*61046927SAndroid Build Coastguard Worker if (ctx->parallel_copies_count == 0)
1636*61046927SAndroid Build Coastguard Worker return;
1637*61046927SAndroid Build Coastguard Worker
1638*61046927SAndroid Build Coastguard Worker struct ir3_instruction *pcopy =
1639*61046927SAndroid Build Coastguard Worker ir3_instr_create(instr->block, OPC_META_PARALLEL_COPY,
1640*61046927SAndroid Build Coastguard Worker ctx->parallel_copies_count, ctx->parallel_copies_count);
1641*61046927SAndroid Build Coastguard Worker
1642*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
1643*61046927SAndroid Build Coastguard Worker struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
1644*61046927SAndroid Build Coastguard Worker struct ir3_register *reg =
1645*61046927SAndroid Build Coastguard Worker ir3_dst_create(pcopy, INVALID_REG,
1646*61046927SAndroid Build Coastguard Worker entry->interval->interval.reg->flags &
1647*61046927SAndroid Build Coastguard Worker (IR3_REG_HALF | IR3_REG_ARRAY | IR3_REG_SHARED));
1648*61046927SAndroid Build Coastguard Worker reg->size = entry->interval->interval.reg->size;
1649*61046927SAndroid Build Coastguard Worker reg->wrmask = entry->interval->interval.reg->wrmask;
1650*61046927SAndroid Build Coastguard Worker assign_reg(pcopy, reg, ra_interval_get_num(entry->interval));
1651*61046927SAndroid Build Coastguard Worker }
1652*61046927SAndroid Build Coastguard Worker
1653*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
1654*61046927SAndroid Build Coastguard Worker struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
1655*61046927SAndroid Build Coastguard Worker struct ir3_register *reg =
1656*61046927SAndroid Build Coastguard Worker ir3_src_create(pcopy, INVALID_REG,
1657*61046927SAndroid Build Coastguard Worker entry->interval->interval.reg->flags &
1658*61046927SAndroid Build Coastguard Worker (IR3_REG_HALF | IR3_REG_ARRAY | IR3_REG_SHARED));
1659*61046927SAndroid Build Coastguard Worker reg->size = entry->interval->interval.reg->size;
1660*61046927SAndroid Build Coastguard Worker reg->wrmask = entry->interval->interval.reg->wrmask;
1661*61046927SAndroid Build Coastguard Worker assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags));
1662*61046927SAndroid Build Coastguard Worker }
1663*61046927SAndroid Build Coastguard Worker
1664*61046927SAndroid Build Coastguard Worker list_del(&pcopy->node);
1665*61046927SAndroid Build Coastguard Worker list_addtail(&pcopy->node, &instr->node);
1666*61046927SAndroid Build Coastguard Worker ctx->parallel_copies_count = 0;
1667*61046927SAndroid Build Coastguard Worker }
1668*61046927SAndroid Build Coastguard Worker
1669*61046927SAndroid Build Coastguard Worker static void
handle_normal_instr(struct ra_ctx * ctx,struct ir3_instruction * instr)1670*61046927SAndroid Build Coastguard Worker handle_normal_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
1671*61046927SAndroid Build Coastguard Worker {
1672*61046927SAndroid Build Coastguard Worker /* First, mark sources as going-to-be-killed while allocating the dest. */
1673*61046927SAndroid Build Coastguard Worker ra_foreach_src (src, instr) {
1674*61046927SAndroid Build Coastguard Worker mark_src_killed(ctx, src);
1675*61046927SAndroid Build Coastguard Worker }
1676*61046927SAndroid Build Coastguard Worker
1677*61046927SAndroid Build Coastguard Worker /* Pre-insert tied dst copies. */
1678*61046927SAndroid Build Coastguard Worker ra_foreach_dst (dst, instr) {
1679*61046927SAndroid Build Coastguard Worker insert_tied_dst_copy(ctx, dst);
1680*61046927SAndroid Build Coastguard Worker }
1681*61046927SAndroid Build Coastguard Worker
1682*61046927SAndroid Build Coastguard Worker /* Allocate the destination. */
1683*61046927SAndroid Build Coastguard Worker ra_foreach_dst (dst, instr) {
1684*61046927SAndroid Build Coastguard Worker allocate_dst(ctx, dst);
1685*61046927SAndroid Build Coastguard Worker }
1686*61046927SAndroid Build Coastguard Worker
1687*61046927SAndroid Build Coastguard Worker /* Now handle sources. Go backward so that in case there are multiple
1688*61046927SAndroid Build Coastguard Worker * sources with the same def and that def is killed we only remove it at
1689*61046927SAndroid Build Coastguard Worker * the end.
1690*61046927SAndroid Build Coastguard Worker */
1691*61046927SAndroid Build Coastguard Worker ra_foreach_src_rev (src, instr) {
1692*61046927SAndroid Build Coastguard Worker assign_src(ctx, instr, src);
1693*61046927SAndroid Build Coastguard Worker }
1694*61046927SAndroid Build Coastguard Worker
1695*61046927SAndroid Build Coastguard Worker /* Now finally insert the destination into the map. */
1696*61046927SAndroid Build Coastguard Worker ra_foreach_dst (dst, instr) {
1697*61046927SAndroid Build Coastguard Worker insert_dst(ctx, dst);
1698*61046927SAndroid Build Coastguard Worker }
1699*61046927SAndroid Build Coastguard Worker
1700*61046927SAndroid Build Coastguard Worker insert_parallel_copy_instr(ctx, instr);
1701*61046927SAndroid Build Coastguard Worker }
1702*61046927SAndroid Build Coastguard Worker
1703*61046927SAndroid Build Coastguard Worker static void
handle_split(struct ra_ctx * ctx,struct ir3_instruction * instr)1704*61046927SAndroid Build Coastguard Worker handle_split(struct ra_ctx *ctx, struct ir3_instruction *instr)
1705*61046927SAndroid Build Coastguard Worker {
1706*61046927SAndroid Build Coastguard Worker struct ir3_register *dst = instr->dsts[0];
1707*61046927SAndroid Build Coastguard Worker struct ir3_register *src = instr->srcs[0];
1708*61046927SAndroid Build Coastguard Worker
1709*61046927SAndroid Build Coastguard Worker if (!(dst->flags & IR3_REG_SSA))
1710*61046927SAndroid Build Coastguard Worker return;
1711*61046927SAndroid Build Coastguard Worker
1712*61046927SAndroid Build Coastguard Worker if (dst->merge_set == NULL || src->def->merge_set != dst->merge_set) {
1713*61046927SAndroid Build Coastguard Worker handle_normal_instr(ctx, instr);
1714*61046927SAndroid Build Coastguard Worker return;
1715*61046927SAndroid Build Coastguard Worker }
1716*61046927SAndroid Build Coastguard Worker
1717*61046927SAndroid Build Coastguard Worker struct ra_interval *src_interval = &ctx->intervals[src->def->name];
1718*61046927SAndroid Build Coastguard Worker
1719*61046927SAndroid Build Coastguard Worker physreg_t physreg = ra_interval_get_physreg(src_interval);
1720*61046927SAndroid Build Coastguard Worker assign_src(ctx, instr, src);
1721*61046927SAndroid Build Coastguard Worker
1722*61046927SAndroid Build Coastguard Worker allocate_dst_fixed(
1723*61046927SAndroid Build Coastguard Worker ctx, dst, physreg - src->def->merge_set_offset + dst->merge_set_offset);
1724*61046927SAndroid Build Coastguard Worker insert_dst(ctx, dst);
1725*61046927SAndroid Build Coastguard Worker }
1726*61046927SAndroid Build Coastguard Worker
1727*61046927SAndroid Build Coastguard Worker static void
handle_collect(struct ra_ctx * ctx,struct ir3_instruction * instr)1728*61046927SAndroid Build Coastguard Worker handle_collect(struct ra_ctx *ctx, struct ir3_instruction *instr)
1729*61046927SAndroid Build Coastguard Worker {
1730*61046927SAndroid Build Coastguard Worker if (!(instr->dsts[0]->flags & IR3_REG_SSA))
1731*61046927SAndroid Build Coastguard Worker return;
1732*61046927SAndroid Build Coastguard Worker
1733*61046927SAndroid Build Coastguard Worker struct ir3_merge_set *dst_set = instr->dsts[0]->merge_set;
1734*61046927SAndroid Build Coastguard Worker unsigned dst_offset = instr->dsts[0]->merge_set_offset;
1735*61046927SAndroid Build Coastguard Worker
1736*61046927SAndroid Build Coastguard Worker if (!dst_set || dst_set->regs_count == 1) {
1737*61046927SAndroid Build Coastguard Worker handle_normal_instr(ctx, instr);
1738*61046927SAndroid Build Coastguard Worker return;
1739*61046927SAndroid Build Coastguard Worker }
1740*61046927SAndroid Build Coastguard Worker
1741*61046927SAndroid Build Coastguard Worker /* We need to check if any of the sources are contained in an interval
1742*61046927SAndroid Build Coastguard Worker * that is at least as large as the vector. In this case, we should put
1743*61046927SAndroid Build Coastguard Worker * the vector inside that larger interval. (There should be one
1744*61046927SAndroid Build Coastguard Worker * unambiguous place to put it, because values sharing the same merge set
1745*61046927SAndroid Build Coastguard Worker * should be allocated together.) This can happen in a case like:
1746*61046927SAndroid Build Coastguard Worker *
1747*61046927SAndroid Build Coastguard Worker * ssa_1 (wrmask=0xf) = ...
1748*61046927SAndroid Build Coastguard Worker * ssa_2 = split ssa_1 off:0
1749*61046927SAndroid Build Coastguard Worker * ssa_3 = split ssa_1 off:1
1750*61046927SAndroid Build Coastguard Worker * ssa_4 (wrmask=0x3) = collect (kill)ssa_2, (kill)ssa_3
1751*61046927SAndroid Build Coastguard Worker * ... = (kill)ssa_1
1752*61046927SAndroid Build Coastguard Worker * ... = (kill)ssa_4
1753*61046927SAndroid Build Coastguard Worker *
1754*61046927SAndroid Build Coastguard Worker * ssa_4 will be coalesced with ssa_1 and needs to be allocated inside it.
1755*61046927SAndroid Build Coastguard Worker */
1756*61046927SAndroid Build Coastguard Worker physreg_t dst_fixed = (physreg_t)~0u;
1757*61046927SAndroid Build Coastguard Worker
1758*61046927SAndroid Build Coastguard Worker ra_foreach_src (src, instr) {
1759*61046927SAndroid Build Coastguard Worker if (src->flags & IR3_REG_FIRST_KILL) {
1760*61046927SAndroid Build Coastguard Worker mark_src_killed(ctx, src);
1761*61046927SAndroid Build Coastguard Worker }
1762*61046927SAndroid Build Coastguard Worker
1763*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[src->def->name];
1764*61046927SAndroid Build Coastguard Worker
1765*61046927SAndroid Build Coastguard Worker /* We only need special handling if the source's interval overlaps with
1766*61046927SAndroid Build Coastguard Worker * the destination's interval.
1767*61046927SAndroid Build Coastguard Worker */
1768*61046927SAndroid Build Coastguard Worker if (src->def->interval_start >= instr->dsts[0]->interval_end ||
1769*61046927SAndroid Build Coastguard Worker instr->dsts[0]->interval_start >= src->def->interval_end ||
1770*61046927SAndroid Build Coastguard Worker interval->is_killed)
1771*61046927SAndroid Build Coastguard Worker continue;
1772*61046927SAndroid Build Coastguard Worker
1773*61046927SAndroid Build Coastguard Worker while (interval->interval.parent != NULL) {
1774*61046927SAndroid Build Coastguard Worker interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
1775*61046927SAndroid Build Coastguard Worker }
1776*61046927SAndroid Build Coastguard Worker if (reg_size(interval->interval.reg) >= reg_size(instr->dsts[0])) {
1777*61046927SAndroid Build Coastguard Worker dst_fixed = interval->physreg_start -
1778*61046927SAndroid Build Coastguard Worker interval->interval.reg->merge_set_offset + dst_offset;
1779*61046927SAndroid Build Coastguard Worker } else {
1780*61046927SAndroid Build Coastguard Worker /* For sources whose root interval is smaller than the
1781*61046927SAndroid Build Coastguard Worker * destination (i.e. the normal case), we will shuffle them
1782*61046927SAndroid Build Coastguard Worker * around after allocating the destination. Mark them killed so
1783*61046927SAndroid Build Coastguard Worker * that the destination can be allocated over them, even if they
1784*61046927SAndroid Build Coastguard Worker * aren't actually killed.
1785*61046927SAndroid Build Coastguard Worker */
1786*61046927SAndroid Build Coastguard Worker ra_file_mark_killed(ra_get_file(ctx, src), interval);
1787*61046927SAndroid Build Coastguard Worker }
1788*61046927SAndroid Build Coastguard Worker }
1789*61046927SAndroid Build Coastguard Worker
1790*61046927SAndroid Build Coastguard Worker if (dst_fixed != (physreg_t)~0u)
1791*61046927SAndroid Build Coastguard Worker allocate_dst_fixed(ctx, instr->dsts[0], dst_fixed);
1792*61046927SAndroid Build Coastguard Worker else
1793*61046927SAndroid Build Coastguard Worker allocate_dst(ctx, instr->dsts[0]);
1794*61046927SAndroid Build Coastguard Worker
1795*61046927SAndroid Build Coastguard Worker /* Remove the temporary is_killed we added */
1796*61046927SAndroid Build Coastguard Worker ra_foreach_src (src, instr) {
1797*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[src->def->name];
1798*61046927SAndroid Build Coastguard Worker while (interval->interval.parent != NULL) {
1799*61046927SAndroid Build Coastguard Worker interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
1800*61046927SAndroid Build Coastguard Worker }
1801*61046927SAndroid Build Coastguard Worker
1802*61046927SAndroid Build Coastguard Worker /* Filter out cases where it actually should be killed */
1803*61046927SAndroid Build Coastguard Worker if (interval != &ctx->intervals[src->def->name] ||
1804*61046927SAndroid Build Coastguard Worker !(src->flags & IR3_REG_KILL)) {
1805*61046927SAndroid Build Coastguard Worker ra_file_unmark_killed(ra_get_file(ctx, src), interval);
1806*61046927SAndroid Build Coastguard Worker }
1807*61046927SAndroid Build Coastguard Worker }
1808*61046927SAndroid Build Coastguard Worker
1809*61046927SAndroid Build Coastguard Worker ra_foreach_src_rev (src, instr) {
1810*61046927SAndroid Build Coastguard Worker assign_src(ctx, instr, src);
1811*61046927SAndroid Build Coastguard Worker }
1812*61046927SAndroid Build Coastguard Worker
1813*61046927SAndroid Build Coastguard Worker /* We need to do this before insert_dst(), so that children of the
1814*61046927SAndroid Build Coastguard Worker * destination which got marked as killed and then shuffled around to make
1815*61046927SAndroid Build Coastguard Worker * space for the destination have the correct pcopy destination that
1816*61046927SAndroid Build Coastguard Worker * matches what we assign the source of the collect to in assign_src().
1817*61046927SAndroid Build Coastguard Worker *
1818*61046927SAndroid Build Coastguard Worker * TODO: In this case we'll wind up copying the value in the pcopy and
1819*61046927SAndroid Build Coastguard Worker * then again in the collect. We could avoid one of those by updating the
1820*61046927SAndroid Build Coastguard Worker * pcopy destination to match up with the final location of the source
1821*61046927SAndroid Build Coastguard Worker * after the collect and making the collect a no-op. However this doesn't
1822*61046927SAndroid Build Coastguard Worker * seem to happen often.
1823*61046927SAndroid Build Coastguard Worker */
1824*61046927SAndroid Build Coastguard Worker insert_parallel_copy_instr(ctx, instr);
1825*61046927SAndroid Build Coastguard Worker
1826*61046927SAndroid Build Coastguard Worker /* Note: insert_dst will automatically shuffle around any intervals that
1827*61046927SAndroid Build Coastguard Worker * are a child of the collect by making them children of the collect.
1828*61046927SAndroid Build Coastguard Worker */
1829*61046927SAndroid Build Coastguard Worker
1830*61046927SAndroid Build Coastguard Worker insert_dst(ctx, instr->dsts[0]);
1831*61046927SAndroid Build Coastguard Worker }
1832*61046927SAndroid Build Coastguard Worker
1833*61046927SAndroid Build Coastguard Worker /* Parallel copies before RA should only be at the end of the block, for
1834*61046927SAndroid Build Coastguard Worker * phi's. For these we only need to fill in the sources, and then we fill in
1835*61046927SAndroid Build Coastguard Worker * the destinations in the successor block.
1836*61046927SAndroid Build Coastguard Worker */
1837*61046927SAndroid Build Coastguard Worker static void
handle_pcopy(struct ra_ctx * ctx,struct ir3_instruction * instr)1838*61046927SAndroid Build Coastguard Worker handle_pcopy(struct ra_ctx *ctx, struct ir3_instruction *instr)
1839*61046927SAndroid Build Coastguard Worker {
1840*61046927SAndroid Build Coastguard Worker ra_foreach_src_rev (src, instr) {
1841*61046927SAndroid Build Coastguard Worker assign_src(ctx, instr, src);
1842*61046927SAndroid Build Coastguard Worker }
1843*61046927SAndroid Build Coastguard Worker }
1844*61046927SAndroid Build Coastguard Worker
1845*61046927SAndroid Build Coastguard Worker /* Some inputs may need to be precolored. We need to handle those first, so
1846*61046927SAndroid Build Coastguard Worker * that other non-precolored inputs don't accidentally get allocated over
1847*61046927SAndroid Build Coastguard Worker * them. Inputs are the very first thing in the shader, so it shouldn't be a
1848*61046927SAndroid Build Coastguard Worker * problem to allocate them to a specific physreg.
1849*61046927SAndroid Build Coastguard Worker */
1850*61046927SAndroid Build Coastguard Worker
1851*61046927SAndroid Build Coastguard Worker static void
handle_precolored_input(struct ra_ctx * ctx,struct ir3_instruction * instr)1852*61046927SAndroid Build Coastguard Worker handle_precolored_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
1853*61046927SAndroid Build Coastguard Worker {
1854*61046927SAndroid Build Coastguard Worker if (instr->dsts[0]->num == INVALID_REG ||
1855*61046927SAndroid Build Coastguard Worker !(instr->dsts[0]->flags & IR3_REG_SSA))
1856*61046927SAndroid Build Coastguard Worker return;
1857*61046927SAndroid Build Coastguard Worker
1858*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
1859*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
1860*61046927SAndroid Build Coastguard Worker physreg_t physreg = ra_reg_get_physreg(instr->dsts[0]);
1861*61046927SAndroid Build Coastguard Worker allocate_dst_fixed(ctx, instr->dsts[0], physreg);
1862*61046927SAndroid Build Coastguard Worker
1863*61046927SAndroid Build Coastguard Worker d("insert precolored dst %u physreg %u", instr->dsts[0]->name,
1864*61046927SAndroid Build Coastguard Worker ra_interval_get_physreg(interval));
1865*61046927SAndroid Build Coastguard Worker
1866*61046927SAndroid Build Coastguard Worker ra_file_insert(file, interval);
1867*61046927SAndroid Build Coastguard Worker interval->frozen = true;
1868*61046927SAndroid Build Coastguard Worker }
1869*61046927SAndroid Build Coastguard Worker
1870*61046927SAndroid Build Coastguard Worker static void
handle_input(struct ra_ctx * ctx,struct ir3_instruction * instr)1871*61046927SAndroid Build Coastguard Worker handle_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
1872*61046927SAndroid Build Coastguard Worker {
1873*61046927SAndroid Build Coastguard Worker if (instr->dsts[0]->num != INVALID_REG)
1874*61046927SAndroid Build Coastguard Worker return;
1875*61046927SAndroid Build Coastguard Worker
1876*61046927SAndroid Build Coastguard Worker allocate_dst(ctx, instr->dsts[0]);
1877*61046927SAndroid Build Coastguard Worker
1878*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
1879*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
1880*61046927SAndroid Build Coastguard Worker ra_file_insert(file, interval);
1881*61046927SAndroid Build Coastguard Worker }
1882*61046927SAndroid Build Coastguard Worker
1883*61046927SAndroid Build Coastguard Worker static void
assign_input(struct ra_ctx * ctx,struct ir3_instruction * instr)1884*61046927SAndroid Build Coastguard Worker assign_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
1885*61046927SAndroid Build Coastguard Worker {
1886*61046927SAndroid Build Coastguard Worker if (!(instr->dsts[0]->flags & IR3_REG_SSA))
1887*61046927SAndroid Build Coastguard Worker return;
1888*61046927SAndroid Build Coastguard Worker
1889*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
1890*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
1891*61046927SAndroid Build Coastguard Worker
1892*61046927SAndroid Build Coastguard Worker if (instr->dsts[0]->num == INVALID_REG) {
1893*61046927SAndroid Build Coastguard Worker assign_reg(instr, instr->dsts[0], ra_interval_get_num(interval));
1894*61046927SAndroid Build Coastguard Worker } else {
1895*61046927SAndroid Build Coastguard Worker interval->frozen = false;
1896*61046927SAndroid Build Coastguard Worker }
1897*61046927SAndroid Build Coastguard Worker
1898*61046927SAndroid Build Coastguard Worker if (instr->dsts[0]->flags & IR3_REG_UNUSED)
1899*61046927SAndroid Build Coastguard Worker ra_file_remove(file, interval);
1900*61046927SAndroid Build Coastguard Worker
1901*61046927SAndroid Build Coastguard Worker ra_foreach_src_rev (src, instr)
1902*61046927SAndroid Build Coastguard Worker assign_src(ctx, instr, src);
1903*61046927SAndroid Build Coastguard Worker }
1904*61046927SAndroid Build Coastguard Worker
1905*61046927SAndroid Build Coastguard Worker /* chmask is a bit weird, because it has pre-colored sources due to the need
1906*61046927SAndroid Build Coastguard Worker * to pass some registers to the next stage. Fortunately there are only at
1907*61046927SAndroid Build Coastguard Worker * most two, and there should be no other live values by the time we get to
1908*61046927SAndroid Build Coastguard Worker * this instruction, so we only have to do the minimum and don't need any
1909*61046927SAndroid Build Coastguard Worker * fancy fallbacks.
1910*61046927SAndroid Build Coastguard Worker *
1911*61046927SAndroid Build Coastguard Worker * TODO: Add more complete handling of precolored sources, e.g. for function
1912*61046927SAndroid Build Coastguard Worker * argument handling. We'd need a way to mark sources as fixed so that they
1913*61046927SAndroid Build Coastguard Worker * don't get moved around when placing other sources in the fallback case, and
1914*61046927SAndroid Build Coastguard Worker * a duplication of much of the logic in get_reg(). This also opens another
1915*61046927SAndroid Build Coastguard Worker * can of worms, e.g. what if the precolored source is a split of a vector
1916*61046927SAndroid Build Coastguard Worker * which is still live -- this breaks our assumption that splits don't incur
1917*61046927SAndroid Build Coastguard Worker * any "extra" register requirements and we'd have to break it out of the
1918*61046927SAndroid Build Coastguard Worker * parent ra_interval.
1919*61046927SAndroid Build Coastguard Worker */
1920*61046927SAndroid Build Coastguard Worker
1921*61046927SAndroid Build Coastguard Worker static void
handle_precolored_source(struct ra_ctx * ctx,struct ir3_register * src)1922*61046927SAndroid Build Coastguard Worker handle_precolored_source(struct ra_ctx *ctx, struct ir3_register *src)
1923*61046927SAndroid Build Coastguard Worker {
1924*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, src);
1925*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[src->def->name];
1926*61046927SAndroid Build Coastguard Worker physreg_t physreg = ra_reg_get_physreg(src);
1927*61046927SAndroid Build Coastguard Worker
1928*61046927SAndroid Build Coastguard Worker if (ra_interval_get_num(interval) == src->num)
1929*61046927SAndroid Build Coastguard Worker return;
1930*61046927SAndroid Build Coastguard Worker
1931*61046927SAndroid Build Coastguard Worker /* Try evicting stuff in our way if it isn't free. This won't move
1932*61046927SAndroid Build Coastguard Worker * anything unless it overlaps with our precolored physreg, so we don't
1933*61046927SAndroid Build Coastguard Worker * have to worry about evicting other precolored sources.
1934*61046927SAndroid Build Coastguard Worker */
1935*61046927SAndroid Build Coastguard Worker if (!get_reg_specified(ctx, file, src, physreg, true)) {
1936*61046927SAndroid Build Coastguard Worker unsigned eviction_count;
1937*61046927SAndroid Build Coastguard Worker if (!try_evict_regs(ctx, file, src, physreg, &eviction_count, true,
1938*61046927SAndroid Build Coastguard Worker false)) {
1939*61046927SAndroid Build Coastguard Worker unreachable("failed to evict for precolored source!");
1940*61046927SAndroid Build Coastguard Worker return;
1941*61046927SAndroid Build Coastguard Worker }
1942*61046927SAndroid Build Coastguard Worker }
1943*61046927SAndroid Build Coastguard Worker
1944*61046927SAndroid Build Coastguard Worker ra_move_interval(ctx, file, interval, physreg);
1945*61046927SAndroid Build Coastguard Worker }
1946*61046927SAndroid Build Coastguard Worker
1947*61046927SAndroid Build Coastguard Worker static void
handle_chmask(struct ra_ctx * ctx,struct ir3_instruction * instr)1948*61046927SAndroid Build Coastguard Worker handle_chmask(struct ra_ctx *ctx, struct ir3_instruction *instr)
1949*61046927SAndroid Build Coastguard Worker {
1950*61046927SAndroid Build Coastguard Worker /* Note: we purposely don't mark sources as killed, so that we can reuse
1951*61046927SAndroid Build Coastguard Worker * some of the get_reg() machinery as-if the source is a destination.
1952*61046927SAndroid Build Coastguard Worker * Marking it as killed would make e.g. get_reg_specified() wouldn't work
1953*61046927SAndroid Build Coastguard Worker * correctly.
1954*61046927SAndroid Build Coastguard Worker */
1955*61046927SAndroid Build Coastguard Worker ra_foreach_src (src, instr) {
1956*61046927SAndroid Build Coastguard Worker assert(src->num != INVALID_REG);
1957*61046927SAndroid Build Coastguard Worker handle_precolored_source(ctx, src);
1958*61046927SAndroid Build Coastguard Worker }
1959*61046927SAndroid Build Coastguard Worker
1960*61046927SAndroid Build Coastguard Worker ra_foreach_src (src, instr) {
1961*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, src);
1962*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[src->def->name];
1963*61046927SAndroid Build Coastguard Worker if (src->flags & IR3_REG_FIRST_KILL)
1964*61046927SAndroid Build Coastguard Worker ra_file_remove(file, interval);
1965*61046927SAndroid Build Coastguard Worker }
1966*61046927SAndroid Build Coastguard Worker
1967*61046927SAndroid Build Coastguard Worker insert_parallel_copy_instr(ctx, instr);
1968*61046927SAndroid Build Coastguard Worker }
1969*61046927SAndroid Build Coastguard Worker
1970*61046927SAndroid Build Coastguard Worker static physreg_t
read_register(struct ra_ctx * ctx,struct ir3_block * block,struct ir3_register * def)1971*61046927SAndroid Build Coastguard Worker read_register(struct ra_ctx *ctx, struct ir3_block *block,
1972*61046927SAndroid Build Coastguard Worker struct ir3_register *def)
1973*61046927SAndroid Build Coastguard Worker {
1974*61046927SAndroid Build Coastguard Worker struct ra_block_state *state = &ctx->blocks[block->index];
1975*61046927SAndroid Build Coastguard Worker if (state->renames) {
1976*61046927SAndroid Build Coastguard Worker struct hash_entry *entry = _mesa_hash_table_search(state->renames, def);
1977*61046927SAndroid Build Coastguard Worker if (entry) {
1978*61046927SAndroid Build Coastguard Worker return (physreg_t)(uintptr_t)entry->data;
1979*61046927SAndroid Build Coastguard Worker }
1980*61046927SAndroid Build Coastguard Worker }
1981*61046927SAndroid Build Coastguard Worker
1982*61046927SAndroid Build Coastguard Worker return ra_reg_get_physreg(def);
1983*61046927SAndroid Build Coastguard Worker }
1984*61046927SAndroid Build Coastguard Worker
1985*61046927SAndroid Build Coastguard Worker static void
handle_live_in(struct ra_ctx * ctx,struct ir3_register * def)1986*61046927SAndroid Build Coastguard Worker handle_live_in(struct ra_ctx *ctx, struct ir3_register *def)
1987*61046927SAndroid Build Coastguard Worker {
1988*61046927SAndroid Build Coastguard Worker physreg_t physreg = ~0;
1989*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
1990*61046927SAndroid Build Coastguard Worker struct ir3_block *pred = ctx->block->predecessors[i];
1991*61046927SAndroid Build Coastguard Worker struct ra_block_state *pred_state = &ctx->blocks[pred->index];
1992*61046927SAndroid Build Coastguard Worker
1993*61046927SAndroid Build Coastguard Worker if (!pred_state->visited)
1994*61046927SAndroid Build Coastguard Worker continue;
1995*61046927SAndroid Build Coastguard Worker
1996*61046927SAndroid Build Coastguard Worker physreg = read_register(ctx, pred, def);
1997*61046927SAndroid Build Coastguard Worker break;
1998*61046927SAndroid Build Coastguard Worker }
1999*61046927SAndroid Build Coastguard Worker
2000*61046927SAndroid Build Coastguard Worker assert(physreg != (physreg_t)~0);
2001*61046927SAndroid Build Coastguard Worker
2002*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[def->name];
2003*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, def);
2004*61046927SAndroid Build Coastguard Worker ra_interval_init(interval, def);
2005*61046927SAndroid Build Coastguard Worker interval->physreg_start = physreg;
2006*61046927SAndroid Build Coastguard Worker interval->physreg_end = physreg + reg_size(def);
2007*61046927SAndroid Build Coastguard Worker ra_file_insert(file, interval);
2008*61046927SAndroid Build Coastguard Worker }
2009*61046927SAndroid Build Coastguard Worker
2010*61046927SAndroid Build Coastguard Worker static void
handle_live_out(struct ra_ctx * ctx,struct ir3_register * def)2011*61046927SAndroid Build Coastguard Worker handle_live_out(struct ra_ctx *ctx, struct ir3_register *def)
2012*61046927SAndroid Build Coastguard Worker {
2013*61046927SAndroid Build Coastguard Worker /* Skip parallelcopy's which in the original program are only used as phi
2014*61046927SAndroid Build Coastguard Worker * arguments. Even though phi arguments are live out, they are only
2015*61046927SAndroid Build Coastguard Worker * assigned when the phi is.
2016*61046927SAndroid Build Coastguard Worker */
2017*61046927SAndroid Build Coastguard Worker if (def->instr->opc == OPC_META_PARALLEL_COPY)
2018*61046927SAndroid Build Coastguard Worker return;
2019*61046927SAndroid Build Coastguard Worker
2020*61046927SAndroid Build Coastguard Worker struct ra_block_state *state = &ctx->blocks[ctx->block->index];
2021*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[def->name];
2022*61046927SAndroid Build Coastguard Worker physreg_t physreg = ra_interval_get_physreg(interval);
2023*61046927SAndroid Build Coastguard Worker if (physreg != ra_reg_get_physreg(def)) {
2024*61046927SAndroid Build Coastguard Worker if (!state->renames)
2025*61046927SAndroid Build Coastguard Worker state->renames = _mesa_pointer_hash_table_create(ctx);
2026*61046927SAndroid Build Coastguard Worker _mesa_hash_table_insert(state->renames, def, (void *)(uintptr_t)physreg);
2027*61046927SAndroid Build Coastguard Worker }
2028*61046927SAndroid Build Coastguard Worker }
2029*61046927SAndroid Build Coastguard Worker
2030*61046927SAndroid Build Coastguard Worker static void
handle_phi(struct ra_ctx * ctx,struct ir3_register * def)2031*61046927SAndroid Build Coastguard Worker handle_phi(struct ra_ctx *ctx, struct ir3_register *def)
2032*61046927SAndroid Build Coastguard Worker {
2033*61046927SAndroid Build Coastguard Worker if (!(def->flags & IR3_REG_SSA))
2034*61046927SAndroid Build Coastguard Worker return;
2035*61046927SAndroid Build Coastguard Worker
2036*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, def);
2037*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[def->name];
2038*61046927SAndroid Build Coastguard Worker
2039*61046927SAndroid Build Coastguard Worker /* phis are always scalar, so they should already be the smallest possible
2040*61046927SAndroid Build Coastguard Worker * size. However they may be coalesced with other live-in values/phi
2041*61046927SAndroid Build Coastguard Worker * nodes, so check for that here.
2042*61046927SAndroid Build Coastguard Worker */
2043*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *parent_ir3 =
2044*61046927SAndroid Build Coastguard Worker ir3_reg_interval_search(&file->reg_ctx.intervals, def->interval_start);
2045*61046927SAndroid Build Coastguard Worker physreg_t physreg;
2046*61046927SAndroid Build Coastguard Worker if (parent_ir3) {
2047*61046927SAndroid Build Coastguard Worker struct ra_interval *parent = ir3_reg_interval_to_ra_interval(parent_ir3);
2048*61046927SAndroid Build Coastguard Worker physreg = ra_interval_get_physreg(parent) +
2049*61046927SAndroid Build Coastguard Worker (def->interval_start - parent_ir3->reg->interval_start);
2050*61046927SAndroid Build Coastguard Worker } else {
2051*61046927SAndroid Build Coastguard Worker physreg = get_reg(ctx, file, def);
2052*61046927SAndroid Build Coastguard Worker }
2053*61046927SAndroid Build Coastguard Worker
2054*61046927SAndroid Build Coastguard Worker allocate_dst_fixed(ctx, def, physreg);
2055*61046927SAndroid Build Coastguard Worker
2056*61046927SAndroid Build Coastguard Worker ra_file_insert(file, interval);
2057*61046927SAndroid Build Coastguard Worker }
2058*61046927SAndroid Build Coastguard Worker
2059*61046927SAndroid Build Coastguard Worker static void
assign_phi(struct ra_ctx * ctx,struct ir3_instruction * phi)2060*61046927SAndroid Build Coastguard Worker assign_phi(struct ra_ctx *ctx, struct ir3_instruction *phi)
2061*61046927SAndroid Build Coastguard Worker {
2062*61046927SAndroid Build Coastguard Worker if (!(phi->dsts[0]->flags & IR3_REG_SSA))
2063*61046927SAndroid Build Coastguard Worker return;
2064*61046927SAndroid Build Coastguard Worker
2065*61046927SAndroid Build Coastguard Worker struct ra_file *file = ra_get_file(ctx, phi->dsts[0]);
2066*61046927SAndroid Build Coastguard Worker struct ra_interval *interval = &ctx->intervals[phi->dsts[0]->name];
2067*61046927SAndroid Build Coastguard Worker assert(!interval->interval.parent);
2068*61046927SAndroid Build Coastguard Worker unsigned num = ra_interval_get_num(interval);
2069*61046927SAndroid Build Coastguard Worker assign_reg(phi, phi->dsts[0], num);
2070*61046927SAndroid Build Coastguard Worker
2071*61046927SAndroid Build Coastguard Worker /* Assign the parallelcopy sources of this phi */
2072*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < phi->srcs_count; i++) {
2073*61046927SAndroid Build Coastguard Worker if (phi->srcs[i]->def) {
2074*61046927SAndroid Build Coastguard Worker assign_reg(phi, phi->srcs[i], num);
2075*61046927SAndroid Build Coastguard Worker assign_reg(phi, phi->srcs[i]->def, num);
2076*61046927SAndroid Build Coastguard Worker }
2077*61046927SAndroid Build Coastguard Worker }
2078*61046927SAndroid Build Coastguard Worker
2079*61046927SAndroid Build Coastguard Worker if (phi->dsts[0]->flags & IR3_REG_UNUSED)
2080*61046927SAndroid Build Coastguard Worker ra_file_remove(file, interval);
2081*61046927SAndroid Build Coastguard Worker }
2082*61046927SAndroid Build Coastguard Worker
2083*61046927SAndroid Build Coastguard Worker /* When we split a live range, we sometimes need to emit fixup code at the end
2084*61046927SAndroid Build Coastguard Worker * of a block. For example, something like:
2085*61046927SAndroid Build Coastguard Worker *
2086*61046927SAndroid Build Coastguard Worker * a = ...
2087*61046927SAndroid Build Coastguard Worker * if (...) {
2088*61046927SAndroid Build Coastguard Worker * ...
2089*61046927SAndroid Build Coastguard Worker * a' = a
2090*61046927SAndroid Build Coastguard Worker * b = ... // a evicted to make room for b
2091*61046927SAndroid Build Coastguard Worker * ...
2092*61046927SAndroid Build Coastguard Worker * }
2093*61046927SAndroid Build Coastguard Worker * ... = a
2094*61046927SAndroid Build Coastguard Worker *
2095*61046927SAndroid Build Coastguard Worker * When we insert the copy to a' in insert_parallel_copy_instr(), this forces
2096*61046927SAndroid Build Coastguard Worker * to insert another copy "a = a'" at the end of the if. Normally this would
2097*61046927SAndroid Build Coastguard Worker * also entail adding a phi node, but since we're about to go out of SSA
2098*61046927SAndroid Build Coastguard Worker * anyway we just insert an extra move. Note, however, that "b" might be used
2099*61046927SAndroid Build Coastguard Worker * in a phi node at the end of the if and share registers with "a", so we
2100*61046927SAndroid Build Coastguard Worker * have to be careful to extend any preexisting parallelcopy instruction
2101*61046927SAndroid Build Coastguard Worker * instead of creating our own in order to guarantee that they properly get
2102*61046927SAndroid Build Coastguard Worker * swapped.
2103*61046927SAndroid Build Coastguard Worker */
2104*61046927SAndroid Build Coastguard Worker
2105*61046927SAndroid Build Coastguard Worker static void
insert_liveout_copy(struct ir3_block * block,physreg_t dst,physreg_t src,struct ir3_register * reg)2106*61046927SAndroid Build Coastguard Worker insert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src,
2107*61046927SAndroid Build Coastguard Worker struct ir3_register *reg)
2108*61046927SAndroid Build Coastguard Worker {
2109*61046927SAndroid Build Coastguard Worker struct ir3_instruction *old_pcopy = NULL;
2110*61046927SAndroid Build Coastguard Worker struct ir3_instruction *last = ir3_block_get_last_non_terminator(block);
2111*61046927SAndroid Build Coastguard Worker
2112*61046927SAndroid Build Coastguard Worker if (last && last->opc == OPC_META_PARALLEL_COPY)
2113*61046927SAndroid Build Coastguard Worker old_pcopy = last;
2114*61046927SAndroid Build Coastguard Worker
2115*61046927SAndroid Build Coastguard Worker unsigned old_pcopy_srcs = old_pcopy ? old_pcopy->srcs_count : 0;
2116*61046927SAndroid Build Coastguard Worker struct ir3_instruction *pcopy = ir3_instr_create(
2117*61046927SAndroid Build Coastguard Worker block, OPC_META_PARALLEL_COPY, old_pcopy_srcs + 1, old_pcopy_srcs + 1);
2118*61046927SAndroid Build Coastguard Worker
2119*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < old_pcopy_srcs; i++) {
2120*61046927SAndroid Build Coastguard Worker old_pcopy->dsts[i]->instr = pcopy;
2121*61046927SAndroid Build Coastguard Worker pcopy->dsts[pcopy->dsts_count++] = old_pcopy->dsts[i];
2122*61046927SAndroid Build Coastguard Worker }
2123*61046927SAndroid Build Coastguard Worker
2124*61046927SAndroid Build Coastguard Worker unsigned flags = reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
2125*61046927SAndroid Build Coastguard Worker
2126*61046927SAndroid Build Coastguard Worker struct ir3_register *dst_reg = ir3_dst_create(pcopy, INVALID_REG, flags);
2127*61046927SAndroid Build Coastguard Worker dst_reg->wrmask = reg->wrmask;
2128*61046927SAndroid Build Coastguard Worker dst_reg->size = reg->size;
2129*61046927SAndroid Build Coastguard Worker assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags));
2130*61046927SAndroid Build Coastguard Worker
2131*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < old_pcopy_srcs; i++) {
2132*61046927SAndroid Build Coastguard Worker pcopy->srcs[pcopy->srcs_count++] = old_pcopy->srcs[i];
2133*61046927SAndroid Build Coastguard Worker }
2134*61046927SAndroid Build Coastguard Worker
2135*61046927SAndroid Build Coastguard Worker struct ir3_register *src_reg = ir3_src_create(pcopy, INVALID_REG, flags);
2136*61046927SAndroid Build Coastguard Worker src_reg->wrmask = reg->wrmask;
2137*61046927SAndroid Build Coastguard Worker src_reg->size = reg->size;
2138*61046927SAndroid Build Coastguard Worker assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags));
2139*61046927SAndroid Build Coastguard Worker
2140*61046927SAndroid Build Coastguard Worker if (old_pcopy)
2141*61046927SAndroid Build Coastguard Worker list_del(&old_pcopy->node);
2142*61046927SAndroid Build Coastguard Worker }
2143*61046927SAndroid Build Coastguard Worker
2144*61046927SAndroid Build Coastguard Worker static void
insert_live_in_move(struct ra_ctx * ctx,struct ra_interval * interval)2145*61046927SAndroid Build Coastguard Worker insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
2146*61046927SAndroid Build Coastguard Worker {
2147*61046927SAndroid Build Coastguard Worker physreg_t physreg = ra_interval_get_physreg(interval);
2148*61046927SAndroid Build Coastguard Worker
2149*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
2150*61046927SAndroid Build Coastguard Worker struct ir3_block *pred = ctx->block->predecessors[i];
2151*61046927SAndroid Build Coastguard Worker struct ra_block_state *pred_state = &ctx->blocks[pred->index];
2152*61046927SAndroid Build Coastguard Worker
2153*61046927SAndroid Build Coastguard Worker if (!pred_state->visited)
2154*61046927SAndroid Build Coastguard Worker continue;
2155*61046927SAndroid Build Coastguard Worker
2156*61046927SAndroid Build Coastguard Worker physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg);
2157*61046927SAndroid Build Coastguard Worker if (pred_reg != physreg) {
2158*61046927SAndroid Build Coastguard Worker assert(!(interval->interval.reg->flags & IR3_REG_SHARED));
2159*61046927SAndroid Build Coastguard Worker insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg);
2160*61046927SAndroid Build Coastguard Worker }
2161*61046927SAndroid Build Coastguard Worker }
2162*61046927SAndroid Build Coastguard Worker }
2163*61046927SAndroid Build Coastguard Worker
2164*61046927SAndroid Build Coastguard Worker static void
insert_file_live_in_moves(struct ra_ctx * ctx,struct ra_file * file)2165*61046927SAndroid Build Coastguard Worker insert_file_live_in_moves(struct ra_ctx *ctx, struct ra_file *file)
2166*61046927SAndroid Build Coastguard Worker {
2167*61046927SAndroid Build Coastguard Worker BITSET_WORD *live_in = ctx->live->live_in[ctx->block->index];
2168*61046927SAndroid Build Coastguard Worker rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
2169*61046927SAndroid Build Coastguard Worker physreg_node) {
2170*61046927SAndroid Build Coastguard Worker /* Skip phi nodes. This needs to happen after phi nodes are allocated,
2171*61046927SAndroid Build Coastguard Worker * because we may have to move live-ins around to make space for phi
2172*61046927SAndroid Build Coastguard Worker * nodes, but we shouldn't be handling phi nodes here.
2173*61046927SAndroid Build Coastguard Worker */
2174*61046927SAndroid Build Coastguard Worker if (BITSET_TEST(live_in, interval->interval.reg->name))
2175*61046927SAndroid Build Coastguard Worker insert_live_in_move(ctx, interval);
2176*61046927SAndroid Build Coastguard Worker }
2177*61046927SAndroid Build Coastguard Worker }
2178*61046927SAndroid Build Coastguard Worker
2179*61046927SAndroid Build Coastguard Worker static void
insert_entry_regs(struct ra_block_state * state,struct ra_file * file)2180*61046927SAndroid Build Coastguard Worker insert_entry_regs(struct ra_block_state *state, struct ra_file *file)
2181*61046927SAndroid Build Coastguard Worker {
2182*61046927SAndroid Build Coastguard Worker rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
2183*61046927SAndroid Build Coastguard Worker physreg_node) {
2184*61046927SAndroid Build Coastguard Worker _mesa_hash_table_insert(state->entry_regs, interval->interval.reg,
2185*61046927SAndroid Build Coastguard Worker (void *)(uintptr_t)interval->physreg_start);
2186*61046927SAndroid Build Coastguard Worker }
2187*61046927SAndroid Build Coastguard Worker }
2188*61046927SAndroid Build Coastguard Worker
2189*61046927SAndroid Build Coastguard Worker static void
insert_live_in_moves(struct ra_ctx * ctx)2190*61046927SAndroid Build Coastguard Worker insert_live_in_moves(struct ra_ctx *ctx)
2191*61046927SAndroid Build Coastguard Worker {
2192*61046927SAndroid Build Coastguard Worker insert_file_live_in_moves(ctx, &ctx->full);
2193*61046927SAndroid Build Coastguard Worker insert_file_live_in_moves(ctx, &ctx->half);
2194*61046927SAndroid Build Coastguard Worker insert_file_live_in_moves(ctx, &ctx->shared);
2195*61046927SAndroid Build Coastguard Worker
2196*61046927SAndroid Build Coastguard Worker /* If not all predecessors are visited, insert live-in regs so that
2197*61046927SAndroid Build Coastguard Worker * insert_live_out_moves() will work.
2198*61046927SAndroid Build Coastguard Worker */
2199*61046927SAndroid Build Coastguard Worker bool all_preds_visited = true;
2200*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
2201*61046927SAndroid Build Coastguard Worker if (!ctx->blocks[ctx->block->predecessors[i]->index].visited) {
2202*61046927SAndroid Build Coastguard Worker all_preds_visited = false;
2203*61046927SAndroid Build Coastguard Worker break;
2204*61046927SAndroid Build Coastguard Worker }
2205*61046927SAndroid Build Coastguard Worker }
2206*61046927SAndroid Build Coastguard Worker
2207*61046927SAndroid Build Coastguard Worker if (!all_preds_visited) {
2208*61046927SAndroid Build Coastguard Worker struct ra_block_state *state = &ctx->blocks[ctx->block->index];
2209*61046927SAndroid Build Coastguard Worker state->entry_regs = _mesa_pointer_hash_table_create(ctx);
2210*61046927SAndroid Build Coastguard Worker
2211*61046927SAndroid Build Coastguard Worker insert_entry_regs(state, &ctx->full);
2212*61046927SAndroid Build Coastguard Worker insert_entry_regs(state, &ctx->half);
2213*61046927SAndroid Build Coastguard Worker insert_entry_regs(state, &ctx->shared);
2214*61046927SAndroid Build Coastguard Worker }
2215*61046927SAndroid Build Coastguard Worker }
2216*61046927SAndroid Build Coastguard Worker
2217*61046927SAndroid Build Coastguard Worker static void
insert_live_out_move(struct ra_ctx * ctx,struct ra_interval * interval)2218*61046927SAndroid Build Coastguard Worker insert_live_out_move(struct ra_ctx *ctx, struct ra_interval *interval)
2219*61046927SAndroid Build Coastguard Worker {
2220*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < 2; i++) {
2221*61046927SAndroid Build Coastguard Worker if (!ctx->block->successors[i])
2222*61046927SAndroid Build Coastguard Worker continue;
2223*61046927SAndroid Build Coastguard Worker
2224*61046927SAndroid Build Coastguard Worker struct ir3_block *succ = ctx->block->successors[i];
2225*61046927SAndroid Build Coastguard Worker struct ra_block_state *succ_state = &ctx->blocks[succ->index];
2226*61046927SAndroid Build Coastguard Worker
2227*61046927SAndroid Build Coastguard Worker if (!succ_state->visited)
2228*61046927SAndroid Build Coastguard Worker continue;
2229*61046927SAndroid Build Coastguard Worker
2230*61046927SAndroid Build Coastguard Worker struct hash_entry *entry = _mesa_hash_table_search(
2231*61046927SAndroid Build Coastguard Worker succ_state->entry_regs, interval->interval.reg);
2232*61046927SAndroid Build Coastguard Worker if (!entry)
2233*61046927SAndroid Build Coastguard Worker continue;
2234*61046927SAndroid Build Coastguard Worker
2235*61046927SAndroid Build Coastguard Worker physreg_t new_reg = (physreg_t)(uintptr_t)entry->data;
2236*61046927SAndroid Build Coastguard Worker if (new_reg != interval->physreg_start) {
2237*61046927SAndroid Build Coastguard Worker insert_liveout_copy(ctx->block, new_reg, interval->physreg_start,
2238*61046927SAndroid Build Coastguard Worker interval->interval.reg);
2239*61046927SAndroid Build Coastguard Worker }
2240*61046927SAndroid Build Coastguard Worker }
2241*61046927SAndroid Build Coastguard Worker }
2242*61046927SAndroid Build Coastguard Worker
2243*61046927SAndroid Build Coastguard Worker static void
insert_file_live_out_moves(struct ra_ctx * ctx,struct ra_file * file)2244*61046927SAndroid Build Coastguard Worker insert_file_live_out_moves(struct ra_ctx *ctx, struct ra_file *file)
2245*61046927SAndroid Build Coastguard Worker {
2246*61046927SAndroid Build Coastguard Worker rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
2247*61046927SAndroid Build Coastguard Worker physreg_node) {
2248*61046927SAndroid Build Coastguard Worker insert_live_out_move(ctx, interval);
2249*61046927SAndroid Build Coastguard Worker }
2250*61046927SAndroid Build Coastguard Worker }
2251*61046927SAndroid Build Coastguard Worker
2252*61046927SAndroid Build Coastguard Worker static void
insert_live_out_moves(struct ra_ctx * ctx)2253*61046927SAndroid Build Coastguard Worker insert_live_out_moves(struct ra_ctx *ctx)
2254*61046927SAndroid Build Coastguard Worker {
2255*61046927SAndroid Build Coastguard Worker insert_file_live_out_moves(ctx, &ctx->full);
2256*61046927SAndroid Build Coastguard Worker insert_file_live_out_moves(ctx, &ctx->half);
2257*61046927SAndroid Build Coastguard Worker insert_file_live_out_moves(ctx, &ctx->shared);
2258*61046927SAndroid Build Coastguard Worker }
2259*61046927SAndroid Build Coastguard Worker
2260*61046927SAndroid Build Coastguard Worker static void
handle_block(struct ra_ctx * ctx,struct ir3_block * block)2261*61046927SAndroid Build Coastguard Worker handle_block(struct ra_ctx *ctx, struct ir3_block *block)
2262*61046927SAndroid Build Coastguard Worker {
2263*61046927SAndroid Build Coastguard Worker ctx->block = block;
2264*61046927SAndroid Build Coastguard Worker
2265*61046927SAndroid Build Coastguard Worker /* Reset the register files from the last block */
2266*61046927SAndroid Build Coastguard Worker ra_file_init(&ctx->full);
2267*61046927SAndroid Build Coastguard Worker ra_file_init(&ctx->half);
2268*61046927SAndroid Build Coastguard Worker ra_file_init(&ctx->shared);
2269*61046927SAndroid Build Coastguard Worker
2270*61046927SAndroid Build Coastguard Worker /* Handle live-ins, phis, and input meta-instructions. These all appear
2271*61046927SAndroid Build Coastguard Worker * live at the beginning of the block, and interfere with each other
2272*61046927SAndroid Build Coastguard Worker * therefore need to be allocated "in parallel". This means that we
2273*61046927SAndroid Build Coastguard Worker * have to allocate all of them, inserting them into the file, and then
2274*61046927SAndroid Build Coastguard Worker * delay updating the IR until all of them are allocated.
2275*61046927SAndroid Build Coastguard Worker *
2276*61046927SAndroid Build Coastguard Worker * Handle precolored inputs first, because we need to make sure that other
2277*61046927SAndroid Build Coastguard Worker * inputs don't overwrite them. We shouldn't have both live-ins/phi nodes
2278*61046927SAndroid Build Coastguard Worker * and inputs at the same time, because the first block doesn't have
2279*61046927SAndroid Build Coastguard Worker * predecessors. Therefore handle_live_in doesn't have to worry about
2280*61046927SAndroid Build Coastguard Worker * them.
2281*61046927SAndroid Build Coastguard Worker */
2282*61046927SAndroid Build Coastguard Worker
2283*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list) {
2284*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_META_INPUT)
2285*61046927SAndroid Build Coastguard Worker handle_precolored_input(ctx, instr);
2286*61046927SAndroid Build Coastguard Worker else
2287*61046927SAndroid Build Coastguard Worker break;
2288*61046927SAndroid Build Coastguard Worker }
2289*61046927SAndroid Build Coastguard Worker
2290*61046927SAndroid Build Coastguard Worker unsigned name;
2291*61046927SAndroid Build Coastguard Worker BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
2292*61046927SAndroid Build Coastguard Worker ctx->live->definitions_count) {
2293*61046927SAndroid Build Coastguard Worker struct ir3_register *reg = ctx->live->definitions[name];
2294*61046927SAndroid Build Coastguard Worker handle_live_in(ctx, reg);
2295*61046927SAndroid Build Coastguard Worker }
2296*61046927SAndroid Build Coastguard Worker
2297*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list) {
2298*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_META_PHI)
2299*61046927SAndroid Build Coastguard Worker handle_phi(ctx, instr->dsts[0]);
2300*61046927SAndroid Build Coastguard Worker else if (instr->opc == OPC_META_INPUT ||
2301*61046927SAndroid Build Coastguard Worker instr->opc == OPC_META_TEX_PREFETCH)
2302*61046927SAndroid Build Coastguard Worker handle_input(ctx, instr);
2303*61046927SAndroid Build Coastguard Worker else
2304*61046927SAndroid Build Coastguard Worker break;
2305*61046927SAndroid Build Coastguard Worker }
2306*61046927SAndroid Build Coastguard Worker
2307*61046927SAndroid Build Coastguard Worker /* After this point, every live-in/phi/input has an interval assigned to
2308*61046927SAndroid Build Coastguard Worker * it. We delay actually assigning values until everything has been
2309*61046927SAndroid Build Coastguard Worker * allocated, so we can simply ignore any parallel copy entries created
2310*61046927SAndroid Build Coastguard Worker * when shuffling them around.
2311*61046927SAndroid Build Coastguard Worker */
2312*61046927SAndroid Build Coastguard Worker ctx->parallel_copies_count = 0;
2313*61046927SAndroid Build Coastguard Worker
2314*61046927SAndroid Build Coastguard Worker insert_live_in_moves(ctx);
2315*61046927SAndroid Build Coastguard Worker
2316*61046927SAndroid Build Coastguard Worker if (RA_DEBUG) {
2317*61046927SAndroid Build Coastguard Worker d("after live-in block %u:\n", block->index);
2318*61046927SAndroid Build Coastguard Worker ra_ctx_dump(ctx);
2319*61046927SAndroid Build Coastguard Worker }
2320*61046927SAndroid Build Coastguard Worker
2321*61046927SAndroid Build Coastguard Worker /* Now we're done with processing live-ins, and can handle the body of the
2322*61046927SAndroid Build Coastguard Worker * block.
2323*61046927SAndroid Build Coastguard Worker */
2324*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list) {
2325*61046927SAndroid Build Coastguard Worker di(instr, "processing");
2326*61046927SAndroid Build Coastguard Worker
2327*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_META_PHI)
2328*61046927SAndroid Build Coastguard Worker assign_phi(ctx, instr);
2329*61046927SAndroid Build Coastguard Worker else if (instr->opc == OPC_META_INPUT ||
2330*61046927SAndroid Build Coastguard Worker instr->opc == OPC_META_TEX_PREFETCH)
2331*61046927SAndroid Build Coastguard Worker assign_input(ctx, instr);
2332*61046927SAndroid Build Coastguard Worker else if (instr->opc == OPC_META_SPLIT)
2333*61046927SAndroid Build Coastguard Worker handle_split(ctx, instr);
2334*61046927SAndroid Build Coastguard Worker else if (instr->opc == OPC_META_COLLECT)
2335*61046927SAndroid Build Coastguard Worker handle_collect(ctx, instr);
2336*61046927SAndroid Build Coastguard Worker else if (instr->opc == OPC_META_PARALLEL_COPY)
2337*61046927SAndroid Build Coastguard Worker handle_pcopy(ctx, instr);
2338*61046927SAndroid Build Coastguard Worker else if (instr->opc == OPC_CHMASK)
2339*61046927SAndroid Build Coastguard Worker handle_chmask(ctx, instr);
2340*61046927SAndroid Build Coastguard Worker else
2341*61046927SAndroid Build Coastguard Worker handle_normal_instr(ctx, instr);
2342*61046927SAndroid Build Coastguard Worker
2343*61046927SAndroid Build Coastguard Worker if (RA_DEBUG)
2344*61046927SAndroid Build Coastguard Worker ra_ctx_dump(ctx);
2345*61046927SAndroid Build Coastguard Worker }
2346*61046927SAndroid Build Coastguard Worker
2347*61046927SAndroid Build Coastguard Worker insert_live_out_moves(ctx);
2348*61046927SAndroid Build Coastguard Worker
2349*61046927SAndroid Build Coastguard Worker BITSET_FOREACH_SET (name, ctx->live->live_out[block->index],
2350*61046927SAndroid Build Coastguard Worker ctx->live->definitions_count) {
2351*61046927SAndroid Build Coastguard Worker struct ir3_register *reg = ctx->live->definitions[name];
2352*61046927SAndroid Build Coastguard Worker handle_live_out(ctx, reg);
2353*61046927SAndroid Build Coastguard Worker }
2354*61046927SAndroid Build Coastguard Worker
2355*61046927SAndroid Build Coastguard Worker ctx->blocks[block->index].visited = true;
2356*61046927SAndroid Build Coastguard Worker }
2357*61046927SAndroid Build Coastguard Worker
2358*61046927SAndroid Build Coastguard Worker static unsigned
calc_target_full_pressure(struct ir3_shader_variant * v,unsigned pressure)2359*61046927SAndroid Build Coastguard Worker calc_target_full_pressure(struct ir3_shader_variant *v, unsigned pressure)
2360*61046927SAndroid Build Coastguard Worker {
2361*61046927SAndroid Build Coastguard Worker /* Registers are allocated in units of vec4, so switch from units of
2362*61046927SAndroid Build Coastguard Worker * half-regs to vec4.
2363*61046927SAndroid Build Coastguard Worker */
2364*61046927SAndroid Build Coastguard Worker unsigned reg_count = DIV_ROUND_UP(pressure, 2 * 4);
2365*61046927SAndroid Build Coastguard Worker
2366*61046927SAndroid Build Coastguard Worker bool double_threadsize = ir3_should_double_threadsize(v, reg_count);
2367*61046927SAndroid Build Coastguard Worker
2368*61046927SAndroid Build Coastguard Worker unsigned target = reg_count;
2369*61046927SAndroid Build Coastguard Worker unsigned reg_independent_max_waves =
2370*61046927SAndroid Build Coastguard Worker ir3_get_reg_independent_max_waves(v, double_threadsize);
2371*61046927SAndroid Build Coastguard Worker unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves(
2372*61046927SAndroid Build Coastguard Worker v->compiler, reg_count, double_threadsize);
2373*61046927SAndroid Build Coastguard Worker unsigned target_waves =
2374*61046927SAndroid Build Coastguard Worker MIN2(reg_independent_max_waves, reg_dependent_max_waves);
2375*61046927SAndroid Build Coastguard Worker
2376*61046927SAndroid Build Coastguard Worker while (target <= RA_FULL_SIZE / (2 * 4) &&
2377*61046927SAndroid Build Coastguard Worker ir3_should_double_threadsize(v, target) == double_threadsize &&
2378*61046927SAndroid Build Coastguard Worker ir3_get_reg_dependent_max_waves(v->compiler, target,
2379*61046927SAndroid Build Coastguard Worker double_threadsize) >= target_waves)
2380*61046927SAndroid Build Coastguard Worker target++;
2381*61046927SAndroid Build Coastguard Worker
2382*61046927SAndroid Build Coastguard Worker return (target - 1) * 2 * 4;
2383*61046927SAndroid Build Coastguard Worker }
2384*61046927SAndroid Build Coastguard Worker
2385*61046927SAndroid Build Coastguard Worker static void
add_pressure(struct ir3_pressure * pressure,struct ir3_register * reg,bool merged_regs)2386*61046927SAndroid Build Coastguard Worker add_pressure(struct ir3_pressure *pressure, struct ir3_register *reg,
2387*61046927SAndroid Build Coastguard Worker bool merged_regs)
2388*61046927SAndroid Build Coastguard Worker {
2389*61046927SAndroid Build Coastguard Worker unsigned size = reg_size(reg);
2390*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_HALF)
2391*61046927SAndroid Build Coastguard Worker pressure->half += size;
2392*61046927SAndroid Build Coastguard Worker if (!(reg->flags & IR3_REG_HALF) || merged_regs)
2393*61046927SAndroid Build Coastguard Worker pressure->full += size;
2394*61046927SAndroid Build Coastguard Worker }
2395*61046927SAndroid Build Coastguard Worker
2396*61046927SAndroid Build Coastguard Worker static void
dummy_interval_add(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)2397*61046927SAndroid Build Coastguard Worker dummy_interval_add(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval)
2398*61046927SAndroid Build Coastguard Worker {
2399*61046927SAndroid Build Coastguard Worker }
2400*61046927SAndroid Build Coastguard Worker
2401*61046927SAndroid Build Coastguard Worker static void
dummy_interval_delete(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * interval)2402*61046927SAndroid Build Coastguard Worker dummy_interval_delete(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval)
2403*61046927SAndroid Build Coastguard Worker {
2404*61046927SAndroid Build Coastguard Worker }
2405*61046927SAndroid Build Coastguard Worker
2406*61046927SAndroid Build Coastguard Worker static void
dummy_interval_readd(struct ir3_reg_ctx * ctx,struct ir3_reg_interval * parent,struct ir3_reg_interval * child)2407*61046927SAndroid Build Coastguard Worker dummy_interval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *parent,
2408*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *child)
2409*61046927SAndroid Build Coastguard Worker {
2410*61046927SAndroid Build Coastguard Worker }
2411*61046927SAndroid Build Coastguard Worker
2412*61046927SAndroid Build Coastguard Worker /* Calculate the minimum possible limit on register pressure so that spilling
2413*61046927SAndroid Build Coastguard Worker * still succeeds. Used to implement IR3_SHADER_DEBUG=spillall.
2414*61046927SAndroid Build Coastguard Worker */
2415*61046927SAndroid Build Coastguard Worker
2416*61046927SAndroid Build Coastguard Worker static void
calc_min_limit_pressure(struct ir3_shader_variant * v,struct ir3_liveness * live,struct ir3_pressure * limit)2417*61046927SAndroid Build Coastguard Worker calc_min_limit_pressure(struct ir3_shader_variant *v,
2418*61046927SAndroid Build Coastguard Worker struct ir3_liveness *live,
2419*61046927SAndroid Build Coastguard Worker struct ir3_pressure *limit)
2420*61046927SAndroid Build Coastguard Worker {
2421*61046927SAndroid Build Coastguard Worker struct ir3_block *start = ir3_start_block(v->ir);
2422*61046927SAndroid Build Coastguard Worker struct ir3_reg_ctx *ctx = ralloc(NULL, struct ir3_reg_ctx);
2423*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *intervals =
2424*61046927SAndroid Build Coastguard Worker rzalloc_array(ctx, struct ir3_reg_interval, live->definitions_count);
2425*61046927SAndroid Build Coastguard Worker
2426*61046927SAndroid Build Coastguard Worker ctx->interval_add = dummy_interval_add;
2427*61046927SAndroid Build Coastguard Worker ctx->interval_delete = dummy_interval_delete;
2428*61046927SAndroid Build Coastguard Worker ctx->interval_readd = dummy_interval_readd;
2429*61046927SAndroid Build Coastguard Worker
2430*61046927SAndroid Build Coastguard Worker limit->full = limit->half = 0;
2431*61046927SAndroid Build Coastguard Worker
2432*61046927SAndroid Build Coastguard Worker struct ir3_pressure cur_pressure = {0};
2433*61046927SAndroid Build Coastguard Worker foreach_instr (input, &start->instr_list) {
2434*61046927SAndroid Build Coastguard Worker if (input->opc != OPC_META_INPUT &&
2435*61046927SAndroid Build Coastguard Worker input->opc != OPC_META_TEX_PREFETCH)
2436*61046927SAndroid Build Coastguard Worker break;
2437*61046927SAndroid Build Coastguard Worker
2438*61046927SAndroid Build Coastguard Worker add_pressure(&cur_pressure, input->dsts[0], v->mergedregs);
2439*61046927SAndroid Build Coastguard Worker }
2440*61046927SAndroid Build Coastguard Worker
2441*61046927SAndroid Build Coastguard Worker limit->full = MAX2(limit->full, cur_pressure.full);
2442*61046927SAndroid Build Coastguard Worker limit->half = MAX2(limit->half, cur_pressure.half);
2443*61046927SAndroid Build Coastguard Worker
2444*61046927SAndroid Build Coastguard Worker foreach_instr (input, &start->instr_list) {
2445*61046927SAndroid Build Coastguard Worker if (input->opc != OPC_META_INPUT &&
2446*61046927SAndroid Build Coastguard Worker input->opc != OPC_META_TEX_PREFETCH)
2447*61046927SAndroid Build Coastguard Worker break;
2448*61046927SAndroid Build Coastguard Worker
2449*61046927SAndroid Build Coastguard Worker /* pre-colored inputs may have holes, which increases the pressure. */
2450*61046927SAndroid Build Coastguard Worker struct ir3_register *dst = input->dsts[0];
2451*61046927SAndroid Build Coastguard Worker if (dst->num != INVALID_REG) {
2452*61046927SAndroid Build Coastguard Worker unsigned physreg = ra_reg_get_physreg(dst) + reg_size(dst);
2453*61046927SAndroid Build Coastguard Worker if (dst->flags & IR3_REG_HALF)
2454*61046927SAndroid Build Coastguard Worker limit->half = MAX2(limit->half, physreg);
2455*61046927SAndroid Build Coastguard Worker if (!(dst->flags & IR3_REG_HALF) || v->mergedregs)
2456*61046927SAndroid Build Coastguard Worker limit->full = MAX2(limit->full, physreg);
2457*61046927SAndroid Build Coastguard Worker }
2458*61046927SAndroid Build Coastguard Worker }
2459*61046927SAndroid Build Coastguard Worker
2460*61046927SAndroid Build Coastguard Worker foreach_block (block, &v->ir->block_list) {
2461*61046927SAndroid Build Coastguard Worker rb_tree_init(&ctx->intervals);
2462*61046927SAndroid Build Coastguard Worker
2463*61046927SAndroid Build Coastguard Worker unsigned name;
2464*61046927SAndroid Build Coastguard Worker BITSET_FOREACH_SET (name, live->live_in[block->index],
2465*61046927SAndroid Build Coastguard Worker live->definitions_count) {
2466*61046927SAndroid Build Coastguard Worker struct ir3_register *reg = live->definitions[name];
2467*61046927SAndroid Build Coastguard Worker ir3_reg_interval_init(&intervals[reg->name], reg);
2468*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert(ctx, &intervals[reg->name]);
2469*61046927SAndroid Build Coastguard Worker }
2470*61046927SAndroid Build Coastguard Worker
2471*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list) {
2472*61046927SAndroid Build Coastguard Worker ra_foreach_dst (dst, instr) {
2473*61046927SAndroid Build Coastguard Worker ir3_reg_interval_init(&intervals[dst->name], dst);
2474*61046927SAndroid Build Coastguard Worker }
2475*61046927SAndroid Build Coastguard Worker /* phis and parallel copies can be deleted via spilling */
2476*61046927SAndroid Build Coastguard Worker
2477*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_META_PHI) {
2478*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert(ctx, &intervals[instr->dsts[0]->name]);
2479*61046927SAndroid Build Coastguard Worker continue;
2480*61046927SAndroid Build Coastguard Worker }
2481*61046927SAndroid Build Coastguard Worker
2482*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_META_PARALLEL_COPY)
2483*61046927SAndroid Build Coastguard Worker continue;
2484*61046927SAndroid Build Coastguard Worker
2485*61046927SAndroid Build Coastguard Worker cur_pressure = (struct ir3_pressure) {0};
2486*61046927SAndroid Build Coastguard Worker
2487*61046927SAndroid Build Coastguard Worker ra_foreach_dst (dst, instr) {
2488*61046927SAndroid Build Coastguard Worker if ((dst->tied && !(dst->tied->flags & IR3_REG_KILL)) ||
2489*61046927SAndroid Build Coastguard Worker (dst->flags & IR3_REG_EARLY_CLOBBER))
2490*61046927SAndroid Build Coastguard Worker add_pressure(&cur_pressure, dst, v->mergedregs);
2491*61046927SAndroid Build Coastguard Worker }
2492*61046927SAndroid Build Coastguard Worker
2493*61046927SAndroid Build Coastguard Worker ra_foreach_src_rev (src, instr) {
2494*61046927SAndroid Build Coastguard Worker /* We currently don't support spilling the parent of a source when
2495*61046927SAndroid Build Coastguard Worker * making space for sources, so we have to keep track of the
2496*61046927SAndroid Build Coastguard Worker * intervals and figure out the root of the tree to figure out how
2497*61046927SAndroid Build Coastguard Worker * much space we need.
2498*61046927SAndroid Build Coastguard Worker *
2499*61046927SAndroid Build Coastguard Worker * TODO: We should probably support this in the spiller.
2500*61046927SAndroid Build Coastguard Worker */
2501*61046927SAndroid Build Coastguard Worker struct ir3_reg_interval *interval = &intervals[src->def->name];
2502*61046927SAndroid Build Coastguard Worker while (interval->parent)
2503*61046927SAndroid Build Coastguard Worker interval = interval->parent;
2504*61046927SAndroid Build Coastguard Worker add_pressure(&cur_pressure, interval->reg, v->mergedregs);
2505*61046927SAndroid Build Coastguard Worker
2506*61046927SAndroid Build Coastguard Worker if (src->flags & IR3_REG_FIRST_KILL)
2507*61046927SAndroid Build Coastguard Worker ir3_reg_interval_remove(ctx, &intervals[src->def->name]);
2508*61046927SAndroid Build Coastguard Worker }
2509*61046927SAndroid Build Coastguard Worker
2510*61046927SAndroid Build Coastguard Worker limit->full = MAX2(limit->full, cur_pressure.full);
2511*61046927SAndroid Build Coastguard Worker limit->half = MAX2(limit->half, cur_pressure.half);
2512*61046927SAndroid Build Coastguard Worker
2513*61046927SAndroid Build Coastguard Worker cur_pressure = (struct ir3_pressure) {0};
2514*61046927SAndroid Build Coastguard Worker
2515*61046927SAndroid Build Coastguard Worker ra_foreach_dst (dst, instr) {
2516*61046927SAndroid Build Coastguard Worker ir3_reg_interval_init(&intervals[dst->name], dst);
2517*61046927SAndroid Build Coastguard Worker ir3_reg_interval_insert(ctx, &intervals[dst->name]);
2518*61046927SAndroid Build Coastguard Worker add_pressure(&cur_pressure, dst, v->mergedregs);
2519*61046927SAndroid Build Coastguard Worker }
2520*61046927SAndroid Build Coastguard Worker
2521*61046927SAndroid Build Coastguard Worker limit->full = MAX2(limit->full, cur_pressure.full);
2522*61046927SAndroid Build Coastguard Worker limit->half = MAX2(limit->half, cur_pressure.half);
2523*61046927SAndroid Build Coastguard Worker }
2524*61046927SAndroid Build Coastguard Worker }
2525*61046927SAndroid Build Coastguard Worker
2526*61046927SAndroid Build Coastguard Worker /* Account for the base register, which needs to be available everywhere. */
2527*61046927SAndroid Build Coastguard Worker limit->full += 2;
2528*61046927SAndroid Build Coastguard Worker
2529*61046927SAndroid Build Coastguard Worker ralloc_free(ctx);
2530*61046927SAndroid Build Coastguard Worker }
2531*61046927SAndroid Build Coastguard Worker
2532*61046927SAndroid Build Coastguard Worker /*
2533*61046927SAndroid Build Coastguard Worker * If barriers are used, it must be possible for all waves in the workgroup
2534*61046927SAndroid Build Coastguard Worker * to execute concurrently. Thus we may have to reduce the registers limit.
2535*61046927SAndroid Build Coastguard Worker */
2536*61046927SAndroid Build Coastguard Worker static void
calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant * v,struct ir3_pressure * limit_pressure)2537*61046927SAndroid Build Coastguard Worker calc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v,
2538*61046927SAndroid Build Coastguard Worker struct ir3_pressure *limit_pressure)
2539*61046927SAndroid Build Coastguard Worker {
2540*61046927SAndroid Build Coastguard Worker const struct ir3_compiler *compiler = v->compiler;
2541*61046927SAndroid Build Coastguard Worker
2542*61046927SAndroid Build Coastguard Worker unsigned threads_per_wg;
2543*61046927SAndroid Build Coastguard Worker if (v->local_size_variable) {
2544*61046927SAndroid Build Coastguard Worker /* We have to expect the worst case. */
2545*61046927SAndroid Build Coastguard Worker threads_per_wg = compiler->max_variable_workgroup_size;
2546*61046927SAndroid Build Coastguard Worker } else {
2547*61046927SAndroid Build Coastguard Worker threads_per_wg = v->local_size[0] * v->local_size[1] * v->local_size[2];
2548*61046927SAndroid Build Coastguard Worker }
2549*61046927SAndroid Build Coastguard Worker
2550*61046927SAndroid Build Coastguard Worker /* The register file is grouped into reg_size_vec4 number of parts.
2551*61046927SAndroid Build Coastguard Worker * Each part has enough registers to add a single vec4 register to
2552*61046927SAndroid Build Coastguard Worker * each thread of a single-sized wave-pair. With double threadsize
2553*61046927SAndroid Build Coastguard Worker * each wave-pair would consume two parts of the register file to get
2554*61046927SAndroid Build Coastguard Worker * a single vec4 for a thread. The more active wave-pairs the less
2555*61046927SAndroid Build Coastguard Worker * parts each could get.
2556*61046927SAndroid Build Coastguard Worker */
2557*61046927SAndroid Build Coastguard Worker
2558*61046927SAndroid Build Coastguard Worker bool double_threadsize = ir3_should_double_threadsize(v, 0);
2559*61046927SAndroid Build Coastguard Worker unsigned waves_per_wg = DIV_ROUND_UP(
2560*61046927SAndroid Build Coastguard Worker threads_per_wg, compiler->threadsize_base * (double_threadsize ? 2 : 1) *
2561*61046927SAndroid Build Coastguard Worker compiler->wave_granularity);
2562*61046927SAndroid Build Coastguard Worker
2563*61046927SAndroid Build Coastguard Worker uint32_t vec4_regs_per_thread =
2564*61046927SAndroid Build Coastguard Worker compiler->reg_size_vec4 / (waves_per_wg * (double_threadsize ? 2 : 1));
2565*61046927SAndroid Build Coastguard Worker assert(vec4_regs_per_thread > 0);
2566*61046927SAndroid Build Coastguard Worker
2567*61046927SAndroid Build Coastguard Worker uint32_t half_regs_per_thread = vec4_regs_per_thread * 4 * 2;
2568*61046927SAndroid Build Coastguard Worker
2569*61046927SAndroid Build Coastguard Worker if (limit_pressure->full > half_regs_per_thread) {
2570*61046927SAndroid Build Coastguard Worker if (v->mergedregs) {
2571*61046927SAndroid Build Coastguard Worker limit_pressure->full = half_regs_per_thread;
2572*61046927SAndroid Build Coastguard Worker } else {
2573*61046927SAndroid Build Coastguard Worker /* TODO: Handle !mergedregs case, probably we would have to do this
2574*61046927SAndroid Build Coastguard Worker * after the first register pressure pass.
2575*61046927SAndroid Build Coastguard Worker */
2576*61046927SAndroid Build Coastguard Worker }
2577*61046927SAndroid Build Coastguard Worker }
2578*61046927SAndroid Build Coastguard Worker }
2579*61046927SAndroid Build Coastguard Worker
2580*61046927SAndroid Build Coastguard Worker int
ir3_ra(struct ir3_shader_variant * v)2581*61046927SAndroid Build Coastguard Worker ir3_ra(struct ir3_shader_variant *v)
2582*61046927SAndroid Build Coastguard Worker {
2583*61046927SAndroid Build Coastguard Worker ir3_calc_dominance(v->ir);
2584*61046927SAndroid Build Coastguard Worker
2585*61046927SAndroid Build Coastguard Worker /* Predicate RA needs dominance. */
2586*61046927SAndroid Build Coastguard Worker ir3_ra_predicates(v);
2587*61046927SAndroid Build Coastguard Worker
2588*61046927SAndroid Build Coastguard Worker ir3_create_parallel_copies(v->ir);
2589*61046927SAndroid Build Coastguard Worker
2590*61046927SAndroid Build Coastguard Worker struct ra_ctx *ctx = rzalloc(NULL, struct ra_ctx);
2591*61046927SAndroid Build Coastguard Worker
2592*61046927SAndroid Build Coastguard Worker ctx->merged_regs = v->mergedregs;
2593*61046927SAndroid Build Coastguard Worker ctx->compiler = v->compiler;
2594*61046927SAndroid Build Coastguard Worker ctx->stage = v->type;
2595*61046927SAndroid Build Coastguard Worker
2596*61046927SAndroid Build Coastguard Worker struct ir3_liveness *live = ir3_calc_liveness(ctx, v->ir);
2597*61046927SAndroid Build Coastguard Worker
2598*61046927SAndroid Build Coastguard Worker ir3_debug_print(v->ir, "AFTER: create_parallel_copies");
2599*61046927SAndroid Build Coastguard Worker
2600*61046927SAndroid Build Coastguard Worker ir3_index_instrs_for_merge_sets(v->ir);
2601*61046927SAndroid Build Coastguard Worker ir3_merge_regs(live, v->ir);
2602*61046927SAndroid Build Coastguard Worker
2603*61046927SAndroid Build Coastguard Worker bool has_shared_vectors = false;
2604*61046927SAndroid Build Coastguard Worker foreach_block (block, &v->ir->block_list) {
2605*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list) {
2606*61046927SAndroid Build Coastguard Worker ra_foreach_dst (dst, instr) {
2607*61046927SAndroid Build Coastguard Worker if ((dst->flags & IR3_REG_SHARED) && reg_elems(dst) > 1) {
2608*61046927SAndroid Build Coastguard Worker has_shared_vectors = true;
2609*61046927SAndroid Build Coastguard Worker break;
2610*61046927SAndroid Build Coastguard Worker }
2611*61046927SAndroid Build Coastguard Worker }
2612*61046927SAndroid Build Coastguard Worker }
2613*61046927SAndroid Build Coastguard Worker }
2614*61046927SAndroid Build Coastguard Worker
2615*61046927SAndroid Build Coastguard Worker struct ir3_pressure max_pressure;
2616*61046927SAndroid Build Coastguard Worker ir3_calc_pressure(v, live, &max_pressure);
2617*61046927SAndroid Build Coastguard Worker d("max pressure:");
2618*61046927SAndroid Build Coastguard Worker d("\tfull: %u", max_pressure.full);
2619*61046927SAndroid Build Coastguard Worker d("\thalf: %u", max_pressure.half);
2620*61046927SAndroid Build Coastguard Worker d("\tshared: %u", max_pressure.shared);
2621*61046927SAndroid Build Coastguard Worker
2622*61046927SAndroid Build Coastguard Worker struct ir3_pressure limit_pressure;
2623*61046927SAndroid Build Coastguard Worker limit_pressure.full = RA_FULL_SIZE;
2624*61046927SAndroid Build Coastguard Worker limit_pressure.half = RA_HALF_SIZE;
2625*61046927SAndroid Build Coastguard Worker limit_pressure.shared = RA_SHARED_SIZE;
2626*61046927SAndroid Build Coastguard Worker limit_pressure.shared_half = RA_SHARED_HALF_SIZE;
2627*61046927SAndroid Build Coastguard Worker
2628*61046927SAndroid Build Coastguard Worker if (gl_shader_stage_is_compute(v->type) && v->has_barrier) {
2629*61046927SAndroid Build Coastguard Worker calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure);
2630*61046927SAndroid Build Coastguard Worker }
2631*61046927SAndroid Build Coastguard Worker
2632*61046927SAndroid Build Coastguard Worker /* If the user forces a doubled threadsize, we may have to lower the limit
2633*61046927SAndroid Build Coastguard Worker * because on some gens the register file is not big enough to hold a
2634*61046927SAndroid Build Coastguard Worker * double-size wave with all 48 registers in use.
2635*61046927SAndroid Build Coastguard Worker */
2636*61046927SAndroid Build Coastguard Worker if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) {
2637*61046927SAndroid Build Coastguard Worker limit_pressure.full =
2638*61046927SAndroid Build Coastguard Worker MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
2639*61046927SAndroid Build Coastguard Worker }
2640*61046927SAndroid Build Coastguard Worker
2641*61046927SAndroid Build Coastguard Worker /* If requested, lower the limit so that spilling happens more often. */
2642*61046927SAndroid Build Coastguard Worker if (ir3_shader_debug & IR3_DBG_SPILLALL)
2643*61046927SAndroid Build Coastguard Worker calc_min_limit_pressure(v, live, &limit_pressure);
2644*61046927SAndroid Build Coastguard Worker
2645*61046927SAndroid Build Coastguard Worker d("limit pressure:");
2646*61046927SAndroid Build Coastguard Worker d("\tfull: %u", limit_pressure.full);
2647*61046927SAndroid Build Coastguard Worker d("\thalf: %u", limit_pressure.half);
2648*61046927SAndroid Build Coastguard Worker d("\tshared: %u", limit_pressure.shared);
2649*61046927SAndroid Build Coastguard Worker
2650*61046927SAndroid Build Coastguard Worker /* In the worst case, each half register could block one full register, so
2651*61046927SAndroid Build Coastguard Worker * add shared_half in case of fragmentation. In addition, full registers can
2652*61046927SAndroid Build Coastguard Worker * block half registers so we have to consider the total pressure against the
2653*61046927SAndroid Build Coastguard Worker * half limit to prevent live range splitting when we run out of space for
2654*61046927SAndroid Build Coastguard Worker * half registers in the bottom half.
2655*61046927SAndroid Build Coastguard Worker */
2656*61046927SAndroid Build Coastguard Worker if (max_pressure.shared + max_pressure.shared_half > limit_pressure.shared ||
2657*61046927SAndroid Build Coastguard Worker (max_pressure.shared_half > 0 && max_pressure.shared > limit_pressure.shared_half) ||
2658*61046927SAndroid Build Coastguard Worker has_shared_vectors) {
2659*61046927SAndroid Build Coastguard Worker ir3_ra_shared(v, &live);
2660*61046927SAndroid Build Coastguard Worker ir3_calc_pressure(v, live, &max_pressure);
2661*61046927SAndroid Build Coastguard Worker
2662*61046927SAndroid Build Coastguard Worker ir3_debug_print(v->ir, "AFTER: shared register allocation");
2663*61046927SAndroid Build Coastguard Worker }
2664*61046927SAndroid Build Coastguard Worker
2665*61046927SAndroid Build Coastguard Worker bool spilled = false;
2666*61046927SAndroid Build Coastguard Worker if (max_pressure.full > limit_pressure.full ||
2667*61046927SAndroid Build Coastguard Worker max_pressure.half > limit_pressure.half) {
2668*61046927SAndroid Build Coastguard Worker if (!v->compiler->has_pvtmem) {
2669*61046927SAndroid Build Coastguard Worker d("max pressure exceeded!");
2670*61046927SAndroid Build Coastguard Worker goto fail;
2671*61046927SAndroid Build Coastguard Worker }
2672*61046927SAndroid Build Coastguard Worker d("max pressure exceeded, spilling!");
2673*61046927SAndroid Build Coastguard Worker IR3_PASS(v->ir, ir3_spill, v, &live, &limit_pressure);
2674*61046927SAndroid Build Coastguard Worker ir3_calc_pressure(v, live, &max_pressure);
2675*61046927SAndroid Build Coastguard Worker
2676*61046927SAndroid Build Coastguard Worker d("max pressure after spilling:");
2677*61046927SAndroid Build Coastguard Worker d("\tfull: %u", max_pressure.full);
2678*61046927SAndroid Build Coastguard Worker d("\thalf: %u", max_pressure.half);
2679*61046927SAndroid Build Coastguard Worker d("\tshared: %u", max_pressure.shared);
2680*61046927SAndroid Build Coastguard Worker
2681*61046927SAndroid Build Coastguard Worker assert(max_pressure.full <= limit_pressure.full &&
2682*61046927SAndroid Build Coastguard Worker max_pressure.half <= limit_pressure.half);
2683*61046927SAndroid Build Coastguard Worker spilled = true;
2684*61046927SAndroid Build Coastguard Worker }
2685*61046927SAndroid Build Coastguard Worker
2686*61046927SAndroid Build Coastguard Worker ctx->live = live;
2687*61046927SAndroid Build Coastguard Worker ctx->intervals =
2688*61046927SAndroid Build Coastguard Worker rzalloc_array(ctx, struct ra_interval, live->definitions_count);
2689*61046927SAndroid Build Coastguard Worker ctx->blocks = rzalloc_array(ctx, struct ra_block_state, live->block_count);
2690*61046927SAndroid Build Coastguard Worker
2691*61046927SAndroid Build Coastguard Worker ctx->full.size = calc_target_full_pressure(v, max_pressure.full);
2692*61046927SAndroid Build Coastguard Worker d("full size: %u", ctx->full.size);
2693*61046927SAndroid Build Coastguard Worker
2694*61046927SAndroid Build Coastguard Worker if (!v->mergedregs)
2695*61046927SAndroid Build Coastguard Worker ctx->half.size = RA_HALF_SIZE;
2696*61046927SAndroid Build Coastguard Worker
2697*61046927SAndroid Build Coastguard Worker ctx->shared.size = RA_SHARED_SIZE;
2698*61046927SAndroid Build Coastguard Worker
2699*61046927SAndroid Build Coastguard Worker ctx->full.start = ctx->half.start = ctx->shared.start = 0;
2700*61046927SAndroid Build Coastguard Worker
2701*61046927SAndroid Build Coastguard Worker foreach_block (block, &v->ir->block_list)
2702*61046927SAndroid Build Coastguard Worker handle_block(ctx, block);
2703*61046927SAndroid Build Coastguard Worker
2704*61046927SAndroid Build Coastguard Worker ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count, false);
2705*61046927SAndroid Build Coastguard Worker
2706*61046927SAndroid Build Coastguard Worker /* Strip array-ness and SSA-ness at the end, because various helpers still
2707*61046927SAndroid Build Coastguard Worker * need to work even on definitions that have already been assigned. For
2708*61046927SAndroid Build Coastguard Worker * example, we need to preserve array-ness so that array live-ins have the
2709*61046927SAndroid Build Coastguard Worker * right size.
2710*61046927SAndroid Build Coastguard Worker */
2711*61046927SAndroid Build Coastguard Worker foreach_block (block, &v->ir->block_list) {
2712*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list) {
2713*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instr->dsts_count; i++) {
2714*61046927SAndroid Build Coastguard Worker instr->dsts[i]->flags &= ~IR3_REG_SSA;
2715*61046927SAndroid Build Coastguard Worker
2716*61046927SAndroid Build Coastguard Worker /* Parallel copies of array registers copy the whole register, and
2717*61046927SAndroid Build Coastguard Worker * we need some way to let the parallel copy code know that this was
2718*61046927SAndroid Build Coastguard Worker * an array whose size is determined by reg->size. So keep the array
2719*61046927SAndroid Build Coastguard Worker * flag on those. spill/reload also need to work on the entire
2720*61046927SAndroid Build Coastguard Worker * array.
2721*61046927SAndroid Build Coastguard Worker */
2722*61046927SAndroid Build Coastguard Worker if (!is_meta(instr) && instr->opc != OPC_RELOAD_MACRO)
2723*61046927SAndroid Build Coastguard Worker instr->dsts[i]->flags &= ~IR3_REG_ARRAY;
2724*61046927SAndroid Build Coastguard Worker }
2725*61046927SAndroid Build Coastguard Worker
2726*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instr->srcs_count; i++) {
2727*61046927SAndroid Build Coastguard Worker instr->srcs[i]->flags &= ~IR3_REG_SSA;
2728*61046927SAndroid Build Coastguard Worker
2729*61046927SAndroid Build Coastguard Worker if (!is_meta(instr) && instr->opc != OPC_SPILL_MACRO)
2730*61046927SAndroid Build Coastguard Worker instr->srcs[i]->flags &= ~IR3_REG_ARRAY;
2731*61046927SAndroid Build Coastguard Worker }
2732*61046927SAndroid Build Coastguard Worker }
2733*61046927SAndroid Build Coastguard Worker }
2734*61046927SAndroid Build Coastguard Worker
2735*61046927SAndroid Build Coastguard Worker ir3_debug_print(v->ir, "AFTER: register allocation");
2736*61046927SAndroid Build Coastguard Worker
2737*61046927SAndroid Build Coastguard Worker if (spilled) {
2738*61046927SAndroid Build Coastguard Worker IR3_PASS(v->ir, ir3_lower_spill);
2739*61046927SAndroid Build Coastguard Worker }
2740*61046927SAndroid Build Coastguard Worker
2741*61046927SAndroid Build Coastguard Worker ir3_lower_copies(v);
2742*61046927SAndroid Build Coastguard Worker
2743*61046927SAndroid Build Coastguard Worker ir3_debug_print(v->ir, "AFTER: ir3_lower_copies");
2744*61046927SAndroid Build Coastguard Worker
2745*61046927SAndroid Build Coastguard Worker ralloc_free(ctx);
2746*61046927SAndroid Build Coastguard Worker
2747*61046927SAndroid Build Coastguard Worker return 0;
2748*61046927SAndroid Build Coastguard Worker fail:
2749*61046927SAndroid Build Coastguard Worker ralloc_free(ctx);
2750*61046927SAndroid Build Coastguard Worker return -1;
2751*61046927SAndroid Build Coastguard Worker }
2752