1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2014 Rob Clark <[email protected]>
3*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker *
5*61046927SAndroid Build Coastguard Worker * Authors:
6*61046927SAndroid Build Coastguard Worker * Rob Clark <[email protected]>
7*61046927SAndroid Build Coastguard Worker */
8*61046927SAndroid Build Coastguard Worker
9*61046927SAndroid Build Coastguard Worker #include "util/ralloc.h"
10*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
11*61046927SAndroid Build Coastguard Worker
12*61046927SAndroid Build Coastguard Worker #include "ir3.h"
13*61046927SAndroid Build Coastguard Worker #include "ir3_shader.h"
14*61046927SAndroid Build Coastguard Worker
15*61046927SAndroid Build Coastguard Worker /*
16*61046927SAndroid Build Coastguard Worker * Legalize:
17*61046927SAndroid Build Coastguard Worker *
18*61046927SAndroid Build Coastguard Worker * The legalize pass handles ensuring sufficient nop's and sync flags for
19*61046927SAndroid Build Coastguard Worker * correct execution.
20*61046927SAndroid Build Coastguard Worker *
21*61046927SAndroid Build Coastguard Worker * 1) Iteratively determine where sync ((sy)/(ss)) flags are needed,
22*61046927SAndroid Build Coastguard Worker * based on state flowing out of predecessor blocks until there is
23*61046927SAndroid Build Coastguard Worker * no further change. In some cases this requires inserting nops.
24*61046927SAndroid Build Coastguard Worker * 2) Mark (ei) on last varying input
25*61046927SAndroid Build Coastguard Worker * 3) Final nop scheduling for instruction latency
26*61046927SAndroid Build Coastguard Worker * 4) Resolve jumps and schedule blocks, marking potential convergence
27*61046927SAndroid Build Coastguard Worker * points with (jp)
28*61046927SAndroid Build Coastguard Worker */
29*61046927SAndroid Build Coastguard Worker
30*61046927SAndroid Build Coastguard Worker struct ir3_legalize_ctx {
31*61046927SAndroid Build Coastguard Worker struct ir3_compiler *compiler;
32*61046927SAndroid Build Coastguard Worker struct ir3_shader_variant *so;
33*61046927SAndroid Build Coastguard Worker gl_shader_stage type;
34*61046927SAndroid Build Coastguard Worker int max_bary;
35*61046927SAndroid Build Coastguard Worker bool early_input_release;
36*61046927SAndroid Build Coastguard Worker bool has_inputs;
37*61046927SAndroid Build Coastguard Worker bool has_tex_prefetch;
38*61046927SAndroid Build Coastguard Worker };
39*61046927SAndroid Build Coastguard Worker
40*61046927SAndroid Build Coastguard Worker struct ir3_nop_state {
41*61046927SAndroid Build Coastguard Worker unsigned full_ready[GPR_REG_SIZE];
42*61046927SAndroid Build Coastguard Worker unsigned half_ready[GPR_REG_SIZE];
43*61046927SAndroid Build Coastguard Worker };
44*61046927SAndroid Build Coastguard Worker
45*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state {
46*61046927SAndroid Build Coastguard Worker regmask_t needs_ss;
47*61046927SAndroid Build Coastguard Worker regmask_t needs_ss_scalar_full; /* half scalar ALU producer -> full scalar ALU consumer */
48*61046927SAndroid Build Coastguard Worker regmask_t needs_ss_scalar_half; /* full scalar ALU producer -> half scalar ALU consumer */
49*61046927SAndroid Build Coastguard Worker regmask_t needs_ss_war; /* write after read */
50*61046927SAndroid Build Coastguard Worker regmask_t needs_ss_or_sy_war; /* WAR for sy-producer sources */
51*61046927SAndroid Build Coastguard Worker regmask_t needs_ss_scalar_war; /* scalar ALU write -> ALU write */
52*61046927SAndroid Build Coastguard Worker regmask_t needs_ss_or_sy_scalar_war;
53*61046927SAndroid Build Coastguard Worker regmask_t needs_sy;
54*61046927SAndroid Build Coastguard Worker bool needs_ss_for_const;
55*61046927SAndroid Build Coastguard Worker
56*61046927SAndroid Build Coastguard Worker /* Each of these arrays contains the cycle when the corresponding register
57*61046927SAndroid Build Coastguard Worker * becomes "ready" i.e. does not require any more nops. There is a special
58*61046927SAndroid Build Coastguard Worker * mechanism to let ALU instructions read compatible (i.e. same halfness)
59*61046927SAndroid Build Coastguard Worker * destinations of another ALU instruction with less delay, so this can
60*61046927SAndroid Build Coastguard Worker * depend on what type the consuming instruction is, which is why there are
61*61046927SAndroid Build Coastguard Worker * multiple arrays. The cycle is counted relative to the start of the block.
62*61046927SAndroid Build Coastguard Worker */
63*61046927SAndroid Build Coastguard Worker
64*61046927SAndroid Build Coastguard Worker /* When ALU instructions reading the given full/half register will be ready.
65*61046927SAndroid Build Coastguard Worker */
66*61046927SAndroid Build Coastguard Worker struct ir3_nop_state alu_nop;
67*61046927SAndroid Build Coastguard Worker
68*61046927SAndroid Build Coastguard Worker /* When non-ALU (e.g. cat5) instructions reading the given full/half register
69*61046927SAndroid Build Coastguard Worker * will be ready.
70*61046927SAndroid Build Coastguard Worker */
71*61046927SAndroid Build Coastguard Worker struct ir3_nop_state non_alu_nop;
72*61046927SAndroid Build Coastguard Worker
73*61046927SAndroid Build Coastguard Worker /* When p0.x-w, a0.x, and a1.x are ready. */
74*61046927SAndroid Build Coastguard Worker unsigned pred_ready[4];
75*61046927SAndroid Build Coastguard Worker unsigned addr_ready[2];
76*61046927SAndroid Build Coastguard Worker };
77*61046927SAndroid Build Coastguard Worker
78*61046927SAndroid Build Coastguard Worker struct ir3_legalize_block_data {
79*61046927SAndroid Build Coastguard Worker bool valid;
80*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state begin_state;
81*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state state;
82*61046927SAndroid Build Coastguard Worker };
83*61046927SAndroid Build Coastguard Worker
84*61046927SAndroid Build Coastguard Worker static inline bool
needs_ss_war(struct ir3_legalize_state * state,struct ir3_register * dst,bool is_scalar_alu)85*61046927SAndroid Build Coastguard Worker needs_ss_war(struct ir3_legalize_state *state, struct ir3_register *dst,
86*61046927SAndroid Build Coastguard Worker bool is_scalar_alu)
87*61046927SAndroid Build Coastguard Worker {
88*61046927SAndroid Build Coastguard Worker if (regmask_get(&state->needs_ss_war, dst))
89*61046927SAndroid Build Coastguard Worker return true;
90*61046927SAndroid Build Coastguard Worker if (regmask_get(&state->needs_ss_or_sy_war, dst))
91*61046927SAndroid Build Coastguard Worker return true;
92*61046927SAndroid Build Coastguard Worker
93*61046927SAndroid Build Coastguard Worker if (!is_scalar_alu) {
94*61046927SAndroid Build Coastguard Worker if (regmask_get(&state->needs_ss_scalar_war, dst))
95*61046927SAndroid Build Coastguard Worker return true;
96*61046927SAndroid Build Coastguard Worker if (regmask_get(&state->needs_ss_or_sy_scalar_war, dst))
97*61046927SAndroid Build Coastguard Worker return true;
98*61046927SAndroid Build Coastguard Worker }
99*61046927SAndroid Build Coastguard Worker
100*61046927SAndroid Build Coastguard Worker return false;
101*61046927SAndroid Build Coastguard Worker }
102*61046927SAndroid Build Coastguard Worker
103*61046927SAndroid Build Coastguard Worker static inline void
apply_ss(struct ir3_instruction * instr,struct ir3_legalize_state * state,bool mergedregs)104*61046927SAndroid Build Coastguard Worker apply_ss(struct ir3_instruction *instr,
105*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state *state,
106*61046927SAndroid Build Coastguard Worker bool mergedregs)
107*61046927SAndroid Build Coastguard Worker {
108*61046927SAndroid Build Coastguard Worker instr->flags |= IR3_INSTR_SS;
109*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_ss_war, mergedregs);
110*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_ss_or_sy_war, mergedregs);
111*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_ss, mergedregs);
112*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_ss_scalar_war, mergedregs);
113*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_ss_or_sy_scalar_war, mergedregs);
114*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_ss_scalar_full, mergedregs);
115*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_ss_scalar_half, mergedregs);
116*61046927SAndroid Build Coastguard Worker state->needs_ss_for_const = false;
117*61046927SAndroid Build Coastguard Worker }
118*61046927SAndroid Build Coastguard Worker
119*61046927SAndroid Build Coastguard Worker static inline void
apply_sy(struct ir3_instruction * instr,struct ir3_legalize_state * state,bool mergedregs)120*61046927SAndroid Build Coastguard Worker apply_sy(struct ir3_instruction *instr,
121*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state *state,
122*61046927SAndroid Build Coastguard Worker bool mergedregs)
123*61046927SAndroid Build Coastguard Worker {
124*61046927SAndroid Build Coastguard Worker instr->flags |= IR3_INSTR_SY;
125*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_sy, mergedregs);
126*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_ss_or_sy_war, mergedregs);
127*61046927SAndroid Build Coastguard Worker regmask_init(&state->needs_ss_or_sy_scalar_war, mergedregs);
128*61046927SAndroid Build Coastguard Worker }
129*61046927SAndroid Build Coastguard Worker
130*61046927SAndroid Build Coastguard Worker static bool
count_instruction(struct ir3_instruction * n,struct ir3_compiler * compiler)131*61046927SAndroid Build Coastguard Worker count_instruction(struct ir3_instruction *n, struct ir3_compiler *compiler)
132*61046927SAndroid Build Coastguard Worker {
133*61046927SAndroid Build Coastguard Worker /* NOTE: don't count branch/jump since we don't know yet if they will
134*61046927SAndroid Build Coastguard Worker * be eliminated later in resolve_jumps().. really should do that
135*61046927SAndroid Build Coastguard Worker * earlier so we don't have this constraint.
136*61046927SAndroid Build Coastguard Worker */
137*61046927SAndroid Build Coastguard Worker return (is_alu(n) && !is_scalar_alu(n, compiler)) ||
138*61046927SAndroid Build Coastguard Worker (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR) &&
139*61046927SAndroid Build Coastguard Worker (n->opc != OPC_BRAA) && (n->opc != OPC_BRAO));
140*61046927SAndroid Build Coastguard Worker }
141*61046927SAndroid Build Coastguard Worker
142*61046927SAndroid Build Coastguard Worker static unsigned *
get_ready_slot(struct ir3_legalize_state * state,struct ir3_register * reg,unsigned num,bool consumer_alu,bool matching_size)143*61046927SAndroid Build Coastguard Worker get_ready_slot(struct ir3_legalize_state *state,
144*61046927SAndroid Build Coastguard Worker struct ir3_register *reg, unsigned num,
145*61046927SAndroid Build Coastguard Worker bool consumer_alu, bool matching_size)
146*61046927SAndroid Build Coastguard Worker {
147*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_PREDICATE) {
148*61046927SAndroid Build Coastguard Worker assert(num == reg->num);
149*61046927SAndroid Build Coastguard Worker assert(reg_num(reg) == REG_P0);
150*61046927SAndroid Build Coastguard Worker return &state->pred_ready[reg_comp(reg)];
151*61046927SAndroid Build Coastguard Worker }
152*61046927SAndroid Build Coastguard Worker if (reg->num == regid(REG_A0, 0))
153*61046927SAndroid Build Coastguard Worker return &state->addr_ready[0];
154*61046927SAndroid Build Coastguard Worker if (reg->num == regid(REG_A0, 1))
155*61046927SAndroid Build Coastguard Worker return &state->addr_ready[1];
156*61046927SAndroid Build Coastguard Worker struct ir3_nop_state *nop =
157*61046927SAndroid Build Coastguard Worker consumer_alu ? &state->alu_nop : &state->non_alu_nop;
158*61046927SAndroid Build Coastguard Worker assert(!(reg->flags & IR3_REG_SHARED));
159*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_HALF) {
160*61046927SAndroid Build Coastguard Worker if (matching_size)
161*61046927SAndroid Build Coastguard Worker return &nop->half_ready[num];
162*61046927SAndroid Build Coastguard Worker else
163*61046927SAndroid Build Coastguard Worker return &nop->full_ready[num / 2];
164*61046927SAndroid Build Coastguard Worker } else {
165*61046927SAndroid Build Coastguard Worker if (matching_size)
166*61046927SAndroid Build Coastguard Worker return &nop->full_ready[num];
167*61046927SAndroid Build Coastguard Worker /* If "num" is large enough, then it can't alias a half-reg because only
168*61046927SAndroid Build Coastguard Worker * the first half of the full reg speace aliases half regs. Return NULL in
169*61046927SAndroid Build Coastguard Worker * this case.
170*61046927SAndroid Build Coastguard Worker */
171*61046927SAndroid Build Coastguard Worker else if (num * 2 < ARRAY_SIZE(nop->half_ready))
172*61046927SAndroid Build Coastguard Worker return &nop->half_ready[num * 2];
173*61046927SAndroid Build Coastguard Worker else
174*61046927SAndroid Build Coastguard Worker return NULL;
175*61046927SAndroid Build Coastguard Worker }
176*61046927SAndroid Build Coastguard Worker }
177*61046927SAndroid Build Coastguard Worker
178*61046927SAndroid Build Coastguard Worker static unsigned
delay_calc(struct ir3_legalize_state * state,struct ir3_instruction * instr,unsigned cycle)179*61046927SAndroid Build Coastguard Worker delay_calc(struct ir3_legalize_state *state,
180*61046927SAndroid Build Coastguard Worker struct ir3_instruction *instr,
181*61046927SAndroid Build Coastguard Worker unsigned cycle)
182*61046927SAndroid Build Coastguard Worker {
183*61046927SAndroid Build Coastguard Worker /* As far as we know, shader outputs don't need any delay. */
184*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_END || instr->opc == OPC_CHMASK)
185*61046927SAndroid Build Coastguard Worker return 0;
186*61046927SAndroid Build Coastguard Worker
187*61046927SAndroid Build Coastguard Worker unsigned delay = 0;
188*61046927SAndroid Build Coastguard Worker foreach_src_n (src, n, instr) {
189*61046927SAndroid Build Coastguard Worker if (src->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SHARED))
190*61046927SAndroid Build Coastguard Worker continue;
191*61046927SAndroid Build Coastguard Worker
192*61046927SAndroid Build Coastguard Worker unsigned elems = post_ra_reg_elems(src);
193*61046927SAndroid Build Coastguard Worker unsigned num = post_ra_reg_num(src);
194*61046927SAndroid Build Coastguard Worker unsigned src_cycle = cycle;
195*61046927SAndroid Build Coastguard Worker
196*61046927SAndroid Build Coastguard Worker /* gat and swz have scalar sources and each source is read in a
197*61046927SAndroid Build Coastguard Worker * subsequent cycle.
198*61046927SAndroid Build Coastguard Worker */
199*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_GAT || instr->opc == OPC_SWZ)
200*61046927SAndroid Build Coastguard Worker src_cycle += n;
201*61046927SAndroid Build Coastguard Worker
202*61046927SAndroid Build Coastguard Worker /* cat3 instructions consume their last source two cycles later, so they
203*61046927SAndroid Build Coastguard Worker * only need a delay of 1.
204*61046927SAndroid Build Coastguard Worker */
205*61046927SAndroid Build Coastguard Worker if ((is_mad(instr->opc) || is_madsh(instr->opc)) && n == 2)
206*61046927SAndroid Build Coastguard Worker src_cycle += 2;
207*61046927SAndroid Build Coastguard Worker
208*61046927SAndroid Build Coastguard Worker for (unsigned elem = 0; elem < elems; elem++, num++) {
209*61046927SAndroid Build Coastguard Worker unsigned ready_cycle =
210*61046927SAndroid Build Coastguard Worker *get_ready_slot(state, src, num, is_alu(instr), true);
211*61046927SAndroid Build Coastguard Worker delay = MAX2(delay, MAX2(ready_cycle, src_cycle) - src_cycle);
212*61046927SAndroid Build Coastguard Worker
213*61046927SAndroid Build Coastguard Worker /* Increment cycle for ALU instructions with (rptN) where sources are
214*61046927SAndroid Build Coastguard Worker * read each subsequent cycle.
215*61046927SAndroid Build Coastguard Worker */
216*61046927SAndroid Build Coastguard Worker if (instr->repeat && !(src->flags & IR3_REG_RELATIV))
217*61046927SAndroid Build Coastguard Worker src_cycle++;
218*61046927SAndroid Build Coastguard Worker }
219*61046927SAndroid Build Coastguard Worker }
220*61046927SAndroid Build Coastguard Worker
221*61046927SAndroid Build Coastguard Worker return delay;
222*61046927SAndroid Build Coastguard Worker }
223*61046927SAndroid Build Coastguard Worker
224*61046927SAndroid Build Coastguard Worker static void
delay_update(struct ir3_legalize_state * state,struct ir3_instruction * instr,unsigned cycle,bool mergedregs)225*61046927SAndroid Build Coastguard Worker delay_update(struct ir3_legalize_state *state,
226*61046927SAndroid Build Coastguard Worker struct ir3_instruction *instr,
227*61046927SAndroid Build Coastguard Worker unsigned cycle,
228*61046927SAndroid Build Coastguard Worker bool mergedregs)
229*61046927SAndroid Build Coastguard Worker {
230*61046927SAndroid Build Coastguard Worker if (writes_addr1(instr) && instr->block->in_early_preamble)
231*61046927SAndroid Build Coastguard Worker return;
232*61046927SAndroid Build Coastguard Worker
233*61046927SAndroid Build Coastguard Worker foreach_dst_n (dst, n, instr) {
234*61046927SAndroid Build Coastguard Worker unsigned elems = post_ra_reg_elems(dst);
235*61046927SAndroid Build Coastguard Worker unsigned num = post_ra_reg_num(dst);
236*61046927SAndroid Build Coastguard Worker unsigned dst_cycle = cycle;
237*61046927SAndroid Build Coastguard Worker
238*61046927SAndroid Build Coastguard Worker /* sct and swz have scalar destinations and each destination is written in
239*61046927SAndroid Build Coastguard Worker * a subsequent cycle.
240*61046927SAndroid Build Coastguard Worker */
241*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_SCT || instr->opc == OPC_SWZ)
242*61046927SAndroid Build Coastguard Worker dst_cycle += n;
243*61046927SAndroid Build Coastguard Worker
244*61046927SAndroid Build Coastguard Worker /* For relative accesses with (rptN), we have no way of knowing which
245*61046927SAndroid Build Coastguard Worker * component is accessed when, so we have to assume the worst and mark
246*61046927SAndroid Build Coastguard Worker * every array member as being written at the end.
247*61046927SAndroid Build Coastguard Worker */
248*61046927SAndroid Build Coastguard Worker if (dst->flags & IR3_REG_RELATIV)
249*61046927SAndroid Build Coastguard Worker dst_cycle += instr->repeat;
250*61046927SAndroid Build Coastguard Worker
251*61046927SAndroid Build Coastguard Worker if (dst->flags & IR3_REG_SHARED)
252*61046927SAndroid Build Coastguard Worker continue;
253*61046927SAndroid Build Coastguard Worker
254*61046927SAndroid Build Coastguard Worker for (unsigned elem = 0; elem < elems; elem++, num++) {
255*61046927SAndroid Build Coastguard Worker for (unsigned consumer_alu = 0; consumer_alu < 2; consumer_alu++) {
256*61046927SAndroid Build Coastguard Worker for (unsigned matching_size = 0; matching_size < 2; matching_size++) {
257*61046927SAndroid Build Coastguard Worker unsigned *ready_slot =
258*61046927SAndroid Build Coastguard Worker get_ready_slot(state, dst, num, consumer_alu, matching_size);
259*61046927SAndroid Build Coastguard Worker
260*61046927SAndroid Build Coastguard Worker if (!ready_slot)
261*61046927SAndroid Build Coastguard Worker continue;
262*61046927SAndroid Build Coastguard Worker
263*61046927SAndroid Build Coastguard Worker bool reset_ready_slot = false;
264*61046927SAndroid Build Coastguard Worker unsigned delay = 0;
265*61046927SAndroid Build Coastguard Worker if (!is_alu(instr)) {
266*61046927SAndroid Build Coastguard Worker /* Apparently writes that require (ss) or (sy) are
267*61046927SAndroid Build Coastguard Worker * synchronized against previous writes, so consumers don't
268*61046927SAndroid Build Coastguard Worker * have to wait for any previous overlapping ALU instructions
269*61046927SAndroid Build Coastguard Worker * to complete.
270*61046927SAndroid Build Coastguard Worker */
271*61046927SAndroid Build Coastguard Worker reset_ready_slot = true;
272*61046927SAndroid Build Coastguard Worker } else if ((dst->flags & IR3_REG_PREDICATE) ||
273*61046927SAndroid Build Coastguard Worker reg_num(dst) == REG_A0) {
274*61046927SAndroid Build Coastguard Worker delay = 6;
275*61046927SAndroid Build Coastguard Worker if (!matching_size)
276*61046927SAndroid Build Coastguard Worker continue;
277*61046927SAndroid Build Coastguard Worker } else {
278*61046927SAndroid Build Coastguard Worker delay = (consumer_alu && matching_size) ? 3 : 6;
279*61046927SAndroid Build Coastguard Worker }
280*61046927SAndroid Build Coastguard Worker
281*61046927SAndroid Build Coastguard Worker if (!matching_size) {
282*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < reg_elem_size(dst); i++) {
283*61046927SAndroid Build Coastguard Worker ready_slot[i] =
284*61046927SAndroid Build Coastguard Worker reset_ready_slot ? 0 :
285*61046927SAndroid Build Coastguard Worker MAX2(ready_slot[i], dst_cycle + delay);
286*61046927SAndroid Build Coastguard Worker }
287*61046927SAndroid Build Coastguard Worker } else {
288*61046927SAndroid Build Coastguard Worker *ready_slot =
289*61046927SAndroid Build Coastguard Worker reset_ready_slot ? 0 :
290*61046927SAndroid Build Coastguard Worker MAX2(*ready_slot, dst_cycle + delay);
291*61046927SAndroid Build Coastguard Worker }
292*61046927SAndroid Build Coastguard Worker }
293*61046927SAndroid Build Coastguard Worker }
294*61046927SAndroid Build Coastguard Worker
295*61046927SAndroid Build Coastguard Worker /* Increment cycle for ALU instructions with (rptN) where destinations
296*61046927SAndroid Build Coastguard Worker * are written each subsequent cycle.
297*61046927SAndroid Build Coastguard Worker */
298*61046927SAndroid Build Coastguard Worker if (instr->repeat && !(dst->flags & IR3_REG_RELATIV))
299*61046927SAndroid Build Coastguard Worker dst_cycle++;
300*61046927SAndroid Build Coastguard Worker }
301*61046927SAndroid Build Coastguard Worker }
302*61046927SAndroid Build Coastguard Worker }
303*61046927SAndroid Build Coastguard Worker
304*61046927SAndroid Build Coastguard Worker /* We want to evaluate each block from the position of any other
305*61046927SAndroid Build Coastguard Worker * predecessor block, in order that the flags set are the union of
306*61046927SAndroid Build Coastguard Worker * all possible program paths.
307*61046927SAndroid Build Coastguard Worker *
308*61046927SAndroid Build Coastguard Worker * To do this, we need to know the output state (needs_ss/ss_war/sy)
309*61046927SAndroid Build Coastguard Worker * of all predecessor blocks. The tricky thing is loops, which mean
310*61046927SAndroid Build Coastguard Worker * that we can't simply recursively process each predecessor block
311*61046927SAndroid Build Coastguard Worker * before legalizing the current block.
312*61046927SAndroid Build Coastguard Worker *
313*61046927SAndroid Build Coastguard Worker * How we handle that is by looping over all the blocks until the
314*61046927SAndroid Build Coastguard Worker * results converge. If the output state of a given block changes
315*61046927SAndroid Build Coastguard Worker * in a given pass, this means that all successor blocks are not
316*61046927SAndroid Build Coastguard Worker * yet fully legalized.
317*61046927SAndroid Build Coastguard Worker */
318*61046927SAndroid Build Coastguard Worker
319*61046927SAndroid Build Coastguard Worker static bool
legalize_block(struct ir3_legalize_ctx * ctx,struct ir3_block * block)320*61046927SAndroid Build Coastguard Worker legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
321*61046927SAndroid Build Coastguard Worker {
322*61046927SAndroid Build Coastguard Worker struct ir3_legalize_block_data *bd = block->data;
323*61046927SAndroid Build Coastguard Worker
324*61046927SAndroid Build Coastguard Worker if (bd->valid)
325*61046927SAndroid Build Coastguard Worker return false;
326*61046927SAndroid Build Coastguard Worker
327*61046927SAndroid Build Coastguard Worker struct ir3_instruction *last_n = NULL;
328*61046927SAndroid Build Coastguard Worker struct list_head instr_list;
329*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state prev_state = bd->state;
330*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state *state = &bd->begin_state;
331*61046927SAndroid Build Coastguard Worker bool last_input_needs_ss = false;
332*61046927SAndroid Build Coastguard Worker bool mergedregs = ctx->so->mergedregs;
333*61046927SAndroid Build Coastguard Worker
334*61046927SAndroid Build Coastguard Worker /* Our input state is the OR of all predecessor blocks' state.
335*61046927SAndroid Build Coastguard Worker *
336*61046927SAndroid Build Coastguard Worker * Why don't we just zero the state at the beginning before merging in the
337*61046927SAndroid Build Coastguard Worker * predecessors? Because otherwise updates may not be a "lattice refinement",
338*61046927SAndroid Build Coastguard Worker * i.e. needs_ss may go from true to false for some register due to a (ss) we
339*61046927SAndroid Build Coastguard Worker * inserted the second time around (and the same for (sy)). This means that
340*61046927SAndroid Build Coastguard Worker * there's no solid guarantee the algorithm will converge, and in theory
341*61046927SAndroid Build Coastguard Worker * there may be infinite loops where we fight over the placment of an (ss).
342*61046927SAndroid Build Coastguard Worker */
343*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < block->predecessors_count; i++) {
344*61046927SAndroid Build Coastguard Worker struct ir3_block *predecessor = block->predecessors[i];
345*61046927SAndroid Build Coastguard Worker struct ir3_legalize_block_data *pbd = predecessor->data;
346*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state *pstate = &pbd->state;
347*61046927SAndroid Build Coastguard Worker
348*61046927SAndroid Build Coastguard Worker /* Our input (ss)/(sy) state is based on OR'ing the output
349*61046927SAndroid Build Coastguard Worker * state of all our predecessor blocks
350*61046927SAndroid Build Coastguard Worker */
351*61046927SAndroid Build Coastguard Worker regmask_or(&state->needs_ss, &state->needs_ss, &pstate->needs_ss);
352*61046927SAndroid Build Coastguard Worker regmask_or(&state->needs_ss_war, &state->needs_ss_war,
353*61046927SAndroid Build Coastguard Worker &pstate->needs_ss_war);
354*61046927SAndroid Build Coastguard Worker regmask_or(&state->needs_ss_or_sy_war, &state->needs_ss_or_sy_war,
355*61046927SAndroid Build Coastguard Worker &pstate->needs_ss_or_sy_war);
356*61046927SAndroid Build Coastguard Worker regmask_or(&state->needs_sy, &state->needs_sy, &pstate->needs_sy);
357*61046927SAndroid Build Coastguard Worker state->needs_ss_for_const |= pstate->needs_ss_for_const;
358*61046927SAndroid Build Coastguard Worker
359*61046927SAndroid Build Coastguard Worker /* Our nop state is the max of the predecessor blocks */
360*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(state->pred_ready); i++)
361*61046927SAndroid Build Coastguard Worker state->pred_ready[i] = MAX2(state->pred_ready[i],
362*61046927SAndroid Build Coastguard Worker pstate->pred_ready[i]);
363*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(state->alu_nop.full_ready); i++) {
364*61046927SAndroid Build Coastguard Worker state->alu_nop.full_ready[i] = MAX2(state->alu_nop.full_ready[i],
365*61046927SAndroid Build Coastguard Worker pstate->alu_nop.full_ready[i]);
366*61046927SAndroid Build Coastguard Worker state->alu_nop.half_ready[i] = MAX2(state->alu_nop.half_ready[i],
367*61046927SAndroid Build Coastguard Worker pstate->alu_nop.half_ready[i]);
368*61046927SAndroid Build Coastguard Worker state->non_alu_nop.full_ready[i] = MAX2(state->non_alu_nop.full_ready[i],
369*61046927SAndroid Build Coastguard Worker pstate->non_alu_nop.full_ready[i]);
370*61046927SAndroid Build Coastguard Worker state->non_alu_nop.half_ready[i] = MAX2(state->non_alu_nop.half_ready[i],
371*61046927SAndroid Build Coastguard Worker pstate->non_alu_nop.half_ready[i]);
372*61046927SAndroid Build Coastguard Worker }
373*61046927SAndroid Build Coastguard Worker }
374*61046927SAndroid Build Coastguard Worker
375*61046927SAndroid Build Coastguard Worker /* We need to take phsyical-only edges into account when tracking shared
376*61046927SAndroid Build Coastguard Worker * registers.
377*61046927SAndroid Build Coastguard Worker */
378*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
379*61046927SAndroid Build Coastguard Worker struct ir3_block *predecessor = block->physical_predecessors[i];
380*61046927SAndroid Build Coastguard Worker struct ir3_legalize_block_data *pbd = predecessor->data;
381*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state *pstate = &pbd->state;
382*61046927SAndroid Build Coastguard Worker
383*61046927SAndroid Build Coastguard Worker regmask_or_shared(&state->needs_ss, &state->needs_ss, &pstate->needs_ss);
384*61046927SAndroid Build Coastguard Worker regmask_or_shared(&state->needs_ss_scalar_full,
385*61046927SAndroid Build Coastguard Worker &state->needs_ss_scalar_full,
386*61046927SAndroid Build Coastguard Worker &pstate->needs_ss_scalar_full);
387*61046927SAndroid Build Coastguard Worker regmask_or_shared(&state->needs_ss_scalar_half,
388*61046927SAndroid Build Coastguard Worker &state->needs_ss_scalar_half,
389*61046927SAndroid Build Coastguard Worker &pstate->needs_ss_scalar_half);
390*61046927SAndroid Build Coastguard Worker regmask_or_shared(&state->needs_ss_scalar_war, &state->needs_ss_scalar_war,
391*61046927SAndroid Build Coastguard Worker &pstate->needs_ss_scalar_war);
392*61046927SAndroid Build Coastguard Worker regmask_or_shared(&state->needs_ss_or_sy_scalar_war,
393*61046927SAndroid Build Coastguard Worker &state->needs_ss_or_sy_scalar_war,
394*61046927SAndroid Build Coastguard Worker &pstate->needs_ss_or_sy_scalar_war);
395*61046927SAndroid Build Coastguard Worker }
396*61046927SAndroid Build Coastguard Worker
397*61046927SAndroid Build Coastguard Worker memcpy(&bd->state, state, sizeof(*state));
398*61046927SAndroid Build Coastguard Worker state = &bd->state;
399*61046927SAndroid Build Coastguard Worker
400*61046927SAndroid Build Coastguard Worker unsigned input_count = 0;
401*61046927SAndroid Build Coastguard Worker
402*61046927SAndroid Build Coastguard Worker foreach_instr (n, &block->instr_list) {
403*61046927SAndroid Build Coastguard Worker if (is_input(n)) {
404*61046927SAndroid Build Coastguard Worker input_count++;
405*61046927SAndroid Build Coastguard Worker }
406*61046927SAndroid Build Coastguard Worker }
407*61046927SAndroid Build Coastguard Worker
408*61046927SAndroid Build Coastguard Worker unsigned inputs_remaining = input_count;
409*61046927SAndroid Build Coastguard Worker
410*61046927SAndroid Build Coastguard Worker /* Either inputs are in the first block or we expect inputs to be released
411*61046927SAndroid Build Coastguard Worker * with the end of the program.
412*61046927SAndroid Build Coastguard Worker */
413*61046927SAndroid Build Coastguard Worker assert(input_count == 0 || !ctx->early_input_release ||
414*61046927SAndroid Build Coastguard Worker block == ir3_after_preamble(block->shader));
415*61046927SAndroid Build Coastguard Worker
416*61046927SAndroid Build Coastguard Worker /* remove all the instructions from the list, we'll be adding
417*61046927SAndroid Build Coastguard Worker * them back in as we go
418*61046927SAndroid Build Coastguard Worker */
419*61046927SAndroid Build Coastguard Worker list_replace(&block->instr_list, &instr_list);
420*61046927SAndroid Build Coastguard Worker list_inithead(&block->instr_list);
421*61046927SAndroid Build Coastguard Worker
422*61046927SAndroid Build Coastguard Worker unsigned cycle = 0;
423*61046927SAndroid Build Coastguard Worker
424*61046927SAndroid Build Coastguard Worker foreach_instr_safe (n, &instr_list) {
425*61046927SAndroid Build Coastguard Worker unsigned i;
426*61046927SAndroid Build Coastguard Worker
427*61046927SAndroid Build Coastguard Worker n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY);
428*61046927SAndroid Build Coastguard Worker
429*61046927SAndroid Build Coastguard Worker /* _meta::tex_prefetch instructions removed later in
430*61046927SAndroid Build Coastguard Worker * collect_tex_prefetches()
431*61046927SAndroid Build Coastguard Worker */
432*61046927SAndroid Build Coastguard Worker if (is_meta(n) && (n->opc != OPC_META_TEX_PREFETCH))
433*61046927SAndroid Build Coastguard Worker continue;
434*61046927SAndroid Build Coastguard Worker
435*61046927SAndroid Build Coastguard Worker if (is_input(n)) {
436*61046927SAndroid Build Coastguard Worker struct ir3_register *inloc = n->srcs[0];
437*61046927SAndroid Build Coastguard Worker assert(inloc->flags & IR3_REG_IMMED);
438*61046927SAndroid Build Coastguard Worker
439*61046927SAndroid Build Coastguard Worker int last_inloc =
440*61046927SAndroid Build Coastguard Worker inloc->iim_val + ((inloc->flags & IR3_REG_R) ? n->repeat : 0);
441*61046927SAndroid Build Coastguard Worker ctx->max_bary = MAX2(ctx->max_bary, last_inloc);
442*61046927SAndroid Build Coastguard Worker }
443*61046927SAndroid Build Coastguard Worker
444*61046927SAndroid Build Coastguard Worker if ((last_n && is_barrier(last_n)) || n->opc == OPC_SHPE) {
445*61046927SAndroid Build Coastguard Worker apply_ss(n, state, mergedregs);
446*61046927SAndroid Build Coastguard Worker apply_sy(n, state, mergedregs);
447*61046927SAndroid Build Coastguard Worker last_input_needs_ss = false;
448*61046927SAndroid Build Coastguard Worker }
449*61046927SAndroid Build Coastguard Worker
450*61046927SAndroid Build Coastguard Worker if (last_n && (last_n->opc == OPC_PREDT)) {
451*61046927SAndroid Build Coastguard Worker apply_ss(n, state, mergedregs);
452*61046927SAndroid Build Coastguard Worker }
453*61046927SAndroid Build Coastguard Worker
454*61046927SAndroid Build Coastguard Worker bool n_is_scalar_alu = is_scalar_alu(n, ctx->compiler);
455*61046927SAndroid Build Coastguard Worker
456*61046927SAndroid Build Coastguard Worker /* NOTE: consider dst register too.. it could happen that
457*61046927SAndroid Build Coastguard Worker * texture sample instruction (for example) writes some
458*61046927SAndroid Build Coastguard Worker * components which are unused. A subsequent instruction
459*61046927SAndroid Build Coastguard Worker * that writes the same register can race w/ the sam instr
460*61046927SAndroid Build Coastguard Worker * resulting in undefined results:
461*61046927SAndroid Build Coastguard Worker */
462*61046927SAndroid Build Coastguard Worker for (i = 0; i < n->dsts_count + n->srcs_count; i++) {
463*61046927SAndroid Build Coastguard Worker struct ir3_register *reg;
464*61046927SAndroid Build Coastguard Worker if (i < n->dsts_count)
465*61046927SAndroid Build Coastguard Worker reg = n->dsts[i];
466*61046927SAndroid Build Coastguard Worker else
467*61046927SAndroid Build Coastguard Worker reg = n->srcs[i - n->dsts_count];
468*61046927SAndroid Build Coastguard Worker
469*61046927SAndroid Build Coastguard Worker if (reg_gpr(reg)) {
470*61046927SAndroid Build Coastguard Worker
471*61046927SAndroid Build Coastguard Worker /* TODO: we probably only need (ss) for alu
472*61046927SAndroid Build Coastguard Worker * instr consuming sfu result.. need to make
473*61046927SAndroid Build Coastguard Worker * some tests for both this and (sy)..
474*61046927SAndroid Build Coastguard Worker */
475*61046927SAndroid Build Coastguard Worker if (regmask_get(&state->needs_ss, reg)) {
476*61046927SAndroid Build Coastguard Worker apply_ss(n, state, mergedregs);
477*61046927SAndroid Build Coastguard Worker last_input_needs_ss = false;
478*61046927SAndroid Build Coastguard Worker }
479*61046927SAndroid Build Coastguard Worker
480*61046927SAndroid Build Coastguard Worker /* There is a fast feedback path for scalar ALU instructions which
481*61046927SAndroid Build Coastguard Worker * only takes 1 cycle of latency, similar to the normal 3 cycle
482*61046927SAndroid Build Coastguard Worker * latency path for ALU instructions. For this fast path the
483*61046927SAndroid Build Coastguard Worker * producer and consumer must use the same register size (i.e. no
484*61046927SAndroid Build Coastguard Worker * writing a full register and then reading half of it or vice
485*61046927SAndroid Build Coastguard Worker * versa). If we don't hit this path, either because of a mismatched
486*61046927SAndroid Build Coastguard Worker * size or a read via the regular ALU, then the write latency is
487*61046927SAndroid Build Coastguard Worker * variable and we must use (ss) to wait for the scalar ALU. This is
488*61046927SAndroid Build Coastguard Worker * different from the fixed 6 cycle latency for mismatched vector
489*61046927SAndroid Build Coastguard Worker * ALU accesses.
490*61046927SAndroid Build Coastguard Worker */
491*61046927SAndroid Build Coastguard Worker if (n_is_scalar_alu) {
492*61046927SAndroid Build Coastguard Worker /* Check if we have a mismatched size RaW dependency */
493*61046927SAndroid Build Coastguard Worker if (regmask_get((reg->flags & IR3_REG_HALF) ?
494*61046927SAndroid Build Coastguard Worker &state->needs_ss_scalar_half :
495*61046927SAndroid Build Coastguard Worker &state->needs_ss_scalar_full, reg)) {
496*61046927SAndroid Build Coastguard Worker apply_ss(n, state, mergedregs);
497*61046927SAndroid Build Coastguard Worker last_input_needs_ss = false;
498*61046927SAndroid Build Coastguard Worker }
499*61046927SAndroid Build Coastguard Worker } else {
500*61046927SAndroid Build Coastguard Worker /* check if we have a scalar -> vector RaW dependency */
501*61046927SAndroid Build Coastguard Worker if (regmask_get(&state->needs_ss_scalar_half, reg) ||
502*61046927SAndroid Build Coastguard Worker regmask_get(&state->needs_ss_scalar_full, reg)) {
503*61046927SAndroid Build Coastguard Worker apply_ss(n, state, mergedregs);
504*61046927SAndroid Build Coastguard Worker last_input_needs_ss = false;
505*61046927SAndroid Build Coastguard Worker }
506*61046927SAndroid Build Coastguard Worker }
507*61046927SAndroid Build Coastguard Worker
508*61046927SAndroid Build Coastguard Worker if (regmask_get(&state->needs_sy, reg)) {
509*61046927SAndroid Build Coastguard Worker apply_sy(n, state, mergedregs);
510*61046927SAndroid Build Coastguard Worker }
511*61046927SAndroid Build Coastguard Worker } else if ((reg->flags & IR3_REG_CONST)) {
512*61046927SAndroid Build Coastguard Worker if (state->needs_ss_for_const) {
513*61046927SAndroid Build Coastguard Worker apply_ss(n, state, mergedregs);
514*61046927SAndroid Build Coastguard Worker last_input_needs_ss = false;
515*61046927SAndroid Build Coastguard Worker }
516*61046927SAndroid Build Coastguard Worker } else if (reg_is_addr1(reg) && block->in_early_preamble) {
517*61046927SAndroid Build Coastguard Worker if (regmask_get(&state->needs_ss, reg)) {
518*61046927SAndroid Build Coastguard Worker apply_ss(n, state, mergedregs);
519*61046927SAndroid Build Coastguard Worker last_input_needs_ss = false;
520*61046927SAndroid Build Coastguard Worker }
521*61046927SAndroid Build Coastguard Worker }
522*61046927SAndroid Build Coastguard Worker }
523*61046927SAndroid Build Coastguard Worker
524*61046927SAndroid Build Coastguard Worker foreach_dst (reg, n) {
525*61046927SAndroid Build Coastguard Worker if (needs_ss_war(state, reg, n_is_scalar_alu)) {
526*61046927SAndroid Build Coastguard Worker apply_ss(n, state, mergedregs);
527*61046927SAndroid Build Coastguard Worker last_input_needs_ss = false;
528*61046927SAndroid Build Coastguard Worker }
529*61046927SAndroid Build Coastguard Worker }
530*61046927SAndroid Build Coastguard Worker
531*61046927SAndroid Build Coastguard Worker /* I'm not exactly what this is for, but it seems we need this on every
532*61046927SAndroid Build Coastguard Worker * mova1 in early preambles.
533*61046927SAndroid Build Coastguard Worker */
534*61046927SAndroid Build Coastguard Worker if (writes_addr1(n) && block->in_early_preamble)
535*61046927SAndroid Build Coastguard Worker n->srcs[0]->flags |= IR3_REG_R;
536*61046927SAndroid Build Coastguard Worker
537*61046927SAndroid Build Coastguard Worker /* cat5+ does not have an (ss) bit, if needed we need to
538*61046927SAndroid Build Coastguard Worker * insert a nop to carry the sync flag. Would be kinda
539*61046927SAndroid Build Coastguard Worker * clever if we were aware of this during scheduling, but
540*61046927SAndroid Build Coastguard Worker * this should be a pretty rare case:
541*61046927SAndroid Build Coastguard Worker */
542*61046927SAndroid Build Coastguard Worker if ((n->flags & IR3_INSTR_SS) && (opc_cat(n->opc) >= 5)) {
543*61046927SAndroid Build Coastguard Worker struct ir3_instruction *nop;
544*61046927SAndroid Build Coastguard Worker nop = ir3_NOP(block);
545*61046927SAndroid Build Coastguard Worker nop->flags |= IR3_INSTR_SS;
546*61046927SAndroid Build Coastguard Worker n->flags &= ~IR3_INSTR_SS;
547*61046927SAndroid Build Coastguard Worker last_n = nop;
548*61046927SAndroid Build Coastguard Worker cycle++;
549*61046927SAndroid Build Coastguard Worker }
550*61046927SAndroid Build Coastguard Worker
551*61046927SAndroid Build Coastguard Worker unsigned delay = delay_calc(state, n, cycle);
552*61046927SAndroid Build Coastguard Worker
553*61046927SAndroid Build Coastguard Worker /* NOTE: I think the nopN encoding works for a5xx and
554*61046927SAndroid Build Coastguard Worker * probably a4xx, but not a3xx. So far only tested on
555*61046927SAndroid Build Coastguard Worker * a6xx.
556*61046927SAndroid Build Coastguard Worker */
557*61046927SAndroid Build Coastguard Worker
558*61046927SAndroid Build Coastguard Worker if ((delay > 0) && (ctx->compiler->gen >= 6) && last_n &&
559*61046927SAndroid Build Coastguard Worker !n_is_scalar_alu &&
560*61046927SAndroid Build Coastguard Worker ((opc_cat(last_n->opc) == 2) || (opc_cat(last_n->opc) == 3)) &&
561*61046927SAndroid Build Coastguard Worker (last_n->repeat == 0)) {
562*61046927SAndroid Build Coastguard Worker /* the previous cat2/cat3 instruction can encode at most 3 nop's: */
563*61046927SAndroid Build Coastguard Worker unsigned transfer = MIN2(delay, 3 - last_n->nop);
564*61046927SAndroid Build Coastguard Worker last_n->nop += transfer;
565*61046927SAndroid Build Coastguard Worker delay -= transfer;
566*61046927SAndroid Build Coastguard Worker cycle += transfer;
567*61046927SAndroid Build Coastguard Worker }
568*61046927SAndroid Build Coastguard Worker
569*61046927SAndroid Build Coastguard Worker if ((delay > 0) && last_n && (last_n->opc == OPC_NOP)) {
570*61046927SAndroid Build Coastguard Worker /* the previous nop can encode at most 5 repeats: */
571*61046927SAndroid Build Coastguard Worker unsigned transfer = MIN2(delay, 5 - last_n->repeat);
572*61046927SAndroid Build Coastguard Worker last_n->repeat += transfer;
573*61046927SAndroid Build Coastguard Worker delay -= transfer;
574*61046927SAndroid Build Coastguard Worker cycle += transfer;
575*61046927SAndroid Build Coastguard Worker }
576*61046927SAndroid Build Coastguard Worker
577*61046927SAndroid Build Coastguard Worker if (delay > 0) {
578*61046927SAndroid Build Coastguard Worker assert(delay <= 6);
579*61046927SAndroid Build Coastguard Worker ir3_NOP(block)->repeat = delay - 1;
580*61046927SAndroid Build Coastguard Worker cycle += delay;
581*61046927SAndroid Build Coastguard Worker }
582*61046927SAndroid Build Coastguard Worker
583*61046927SAndroid Build Coastguard Worker if (ctx->compiler->samgq_workaround &&
584*61046927SAndroid Build Coastguard Worker ctx->type != MESA_SHADER_FRAGMENT &&
585*61046927SAndroid Build Coastguard Worker ctx->type != MESA_SHADER_COMPUTE && n->opc == OPC_SAMGQ) {
586*61046927SAndroid Build Coastguard Worker struct ir3_instruction *samgp;
587*61046927SAndroid Build Coastguard Worker
588*61046927SAndroid Build Coastguard Worker list_delinit(&n->node);
589*61046927SAndroid Build Coastguard Worker
590*61046927SAndroid Build Coastguard Worker for (i = 0; i < 4; i++) {
591*61046927SAndroid Build Coastguard Worker samgp = ir3_instr_clone(n);
592*61046927SAndroid Build Coastguard Worker samgp->opc = OPC_SAMGP0 + i;
593*61046927SAndroid Build Coastguard Worker if (i > 1)
594*61046927SAndroid Build Coastguard Worker samgp->flags |= IR3_INSTR_SY;
595*61046927SAndroid Build Coastguard Worker }
596*61046927SAndroid Build Coastguard Worker } else {
597*61046927SAndroid Build Coastguard Worker list_delinit(&n->node);
598*61046927SAndroid Build Coastguard Worker list_addtail(&n->node, &block->instr_list);
599*61046927SAndroid Build Coastguard Worker }
600*61046927SAndroid Build Coastguard Worker
601*61046927SAndroid Build Coastguard Worker if (is_sfu(n))
602*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_ss, n->dsts[0]);
603*61046927SAndroid Build Coastguard Worker
604*61046927SAndroid Build Coastguard Worker foreach_dst (dst, n) {
605*61046927SAndroid Build Coastguard Worker if (dst->flags & IR3_REG_SHARED) {
606*61046927SAndroid Build Coastguard Worker if (n_is_scalar_alu) {
607*61046927SAndroid Build Coastguard Worker if (dst->flags & IR3_REG_HALF)
608*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_ss_scalar_full, dst);
609*61046927SAndroid Build Coastguard Worker else
610*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_ss_scalar_half, dst);
611*61046927SAndroid Build Coastguard Worker } else {
612*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_ss, dst);
613*61046927SAndroid Build Coastguard Worker }
614*61046927SAndroid Build Coastguard Worker } else if (reg_is_addr1(dst) && block->in_early_preamble) {
615*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_ss, dst);
616*61046927SAndroid Build Coastguard Worker }
617*61046927SAndroid Build Coastguard Worker }
618*61046927SAndroid Build Coastguard Worker
619*61046927SAndroid Build Coastguard Worker if (is_tex_or_prefetch(n) && n->dsts_count > 0) {
620*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_sy, n->dsts[0]);
621*61046927SAndroid Build Coastguard Worker if (n->opc == OPC_META_TEX_PREFETCH)
622*61046927SAndroid Build Coastguard Worker ctx->has_tex_prefetch = true;
623*61046927SAndroid Build Coastguard Worker } else if (n->opc == OPC_RESINFO && n->dsts_count > 0) {
624*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_ss, n->dsts[0]);
625*61046927SAndroid Build Coastguard Worker ir3_NOP(block)->flags |= IR3_INSTR_SS;
626*61046927SAndroid Build Coastguard Worker last_input_needs_ss = false;
627*61046927SAndroid Build Coastguard Worker } else if (is_load(n)) {
628*61046927SAndroid Build Coastguard Worker if (is_local_mem_load(n))
629*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_ss, n->dsts[0]);
630*61046927SAndroid Build Coastguard Worker else
631*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_sy, n->dsts[0]);
632*61046927SAndroid Build Coastguard Worker } else if (is_atomic(n->opc)) {
633*61046927SAndroid Build Coastguard Worker if (is_bindless_atomic(n->opc)) {
634*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_sy, n->srcs[2]);
635*61046927SAndroid Build Coastguard Worker } else if (is_global_a3xx_atomic(n->opc) ||
636*61046927SAndroid Build Coastguard Worker is_global_a6xx_atomic(n->opc)) {
637*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_sy, n->dsts[0]);
638*61046927SAndroid Build Coastguard Worker } else {
639*61046927SAndroid Build Coastguard Worker regmask_set(&state->needs_ss, n->dsts[0]);
640*61046927SAndroid Build Coastguard Worker }
641*61046927SAndroid Build Coastguard Worker } else if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO) {
642*61046927SAndroid Build Coastguard Worker state->needs_ss_for_const = true;
643*61046927SAndroid Build Coastguard Worker }
644*61046927SAndroid Build Coastguard Worker
645*61046927SAndroid Build Coastguard Worker if (is_ssbo(n->opc) || is_global_a3xx_atomic(n->opc) ||
646*61046927SAndroid Build Coastguard Worker is_bindless_atomic(n->opc))
647*61046927SAndroid Build Coastguard Worker ctx->so->has_ssbo = true;
648*61046927SAndroid Build Coastguard Worker
649*61046927SAndroid Build Coastguard Worker /* both tex/sfu appear to not always immediately consume
650*61046927SAndroid Build Coastguard Worker * their src register(s):
651*61046927SAndroid Build Coastguard Worker */
652*61046927SAndroid Build Coastguard Worker if (is_war_hazard_producer(n)) {
653*61046927SAndroid Build Coastguard Worker /* These WAR hazards can always be resolved with (ss). However, when
654*61046927SAndroid Build Coastguard Worker * the reader is a sy-producer, they can also be resolved using (sy)
655*61046927SAndroid Build Coastguard Worker * because once we have synced the reader's results using (sy), its
656*61046927SAndroid Build Coastguard Worker * sources have definitely been consumed. We track the two cases
657*61046927SAndroid Build Coastguard Worker * separately so that we don't add an unnecessary (ss) if a (sy) sync
658*61046927SAndroid Build Coastguard Worker * already happened.
659*61046927SAndroid Build Coastguard Worker * For example, this prevents adding the unnecessary (ss) in the
660*61046927SAndroid Build Coastguard Worker * following sequence:
661*61046927SAndroid Build Coastguard Worker * sam rd, rs, ...
662*61046927SAndroid Build Coastguard Worker * (sy)... ; sam synced so consumed its sources
663*61046927SAndroid Build Coastguard Worker * (ss)write rs ; (ss) unnecessary since rs has been consumed already
664*61046927SAndroid Build Coastguard Worker */
665*61046927SAndroid Build Coastguard Worker bool needs_ss = is_ss_producer(n) || is_store(n) || n->opc == OPC_STC;
666*61046927SAndroid Build Coastguard Worker
667*61046927SAndroid Build Coastguard Worker if (n_is_scalar_alu) {
668*61046927SAndroid Build Coastguard Worker /* Scalar ALU also does not immediately read its source because it
669*61046927SAndroid Build Coastguard Worker * is not executed right away, but scalar ALU instructions are
670*61046927SAndroid Build Coastguard Worker * executed in-order so subsequent scalar ALU instructions don't
671*61046927SAndroid Build Coastguard Worker * need to wait for previous ones.
672*61046927SAndroid Build Coastguard Worker */
673*61046927SAndroid Build Coastguard Worker regmask_t *mask = needs_ss ? &state->needs_ss_scalar_war
674*61046927SAndroid Build Coastguard Worker : &state->needs_ss_or_sy_scalar_war;
675*61046927SAndroid Build Coastguard Worker
676*61046927SAndroid Build Coastguard Worker foreach_src (reg, n) {
677*61046927SAndroid Build Coastguard Worker if ((reg->flags & IR3_REG_SHARED) || is_reg_a0(reg)) {
678*61046927SAndroid Build Coastguard Worker regmask_set(mask, reg);
679*61046927SAndroid Build Coastguard Worker }
680*61046927SAndroid Build Coastguard Worker }
681*61046927SAndroid Build Coastguard Worker } else {
682*61046927SAndroid Build Coastguard Worker regmask_t *mask =
683*61046927SAndroid Build Coastguard Worker needs_ss ? &state->needs_ss_war : &state->needs_ss_or_sy_war;
684*61046927SAndroid Build Coastguard Worker
685*61046927SAndroid Build Coastguard Worker foreach_src (reg, n) {
686*61046927SAndroid Build Coastguard Worker if (!(reg->flags & (IR3_REG_IMMED | IR3_REG_CONST))) {
687*61046927SAndroid Build Coastguard Worker regmask_set(mask, reg);
688*61046927SAndroid Build Coastguard Worker }
689*61046927SAndroid Build Coastguard Worker }
690*61046927SAndroid Build Coastguard Worker }
691*61046927SAndroid Build Coastguard Worker }
692*61046927SAndroid Build Coastguard Worker
693*61046927SAndroid Build Coastguard Worker bool count = count_instruction(n, ctx->compiler);
694*61046927SAndroid Build Coastguard Worker if (count)
695*61046927SAndroid Build Coastguard Worker cycle += 1;
696*61046927SAndroid Build Coastguard Worker
697*61046927SAndroid Build Coastguard Worker delay_update(state, n, cycle, mergedregs);
698*61046927SAndroid Build Coastguard Worker
699*61046927SAndroid Build Coastguard Worker if (count)
700*61046927SAndroid Build Coastguard Worker cycle += n->repeat;
701*61046927SAndroid Build Coastguard Worker
702*61046927SAndroid Build Coastguard Worker if (ctx->early_input_release && is_input(n)) {
703*61046927SAndroid Build Coastguard Worker last_input_needs_ss |= (n->opc == OPC_LDLV);
704*61046927SAndroid Build Coastguard Worker
705*61046927SAndroid Build Coastguard Worker assert(inputs_remaining > 0);
706*61046927SAndroid Build Coastguard Worker inputs_remaining--;
707*61046927SAndroid Build Coastguard Worker if (inputs_remaining == 0) {
708*61046927SAndroid Build Coastguard Worker /* This is the last input. We add the (ei) flag to release
709*61046927SAndroid Build Coastguard Worker * varying memory after this executes. If it's an ldlv,
710*61046927SAndroid Build Coastguard Worker * however, we need to insert a dummy bary.f on which we can
711*61046927SAndroid Build Coastguard Worker * set the (ei) flag. We may also need to insert an (ss) to
712*61046927SAndroid Build Coastguard Worker * guarantee that all ldlv's have finished fetching their
713*61046927SAndroid Build Coastguard Worker * results before releasing the varying memory.
714*61046927SAndroid Build Coastguard Worker */
715*61046927SAndroid Build Coastguard Worker struct ir3_instruction *last_input = n;
716*61046927SAndroid Build Coastguard Worker if (n->opc == OPC_LDLV) {
717*61046927SAndroid Build Coastguard Worker struct ir3_instruction *baryf;
718*61046927SAndroid Build Coastguard Worker
719*61046927SAndroid Build Coastguard Worker /* (ss)bary.f (ei)r63.x, 0, r0.x */
720*61046927SAndroid Build Coastguard Worker baryf = ir3_instr_create(block, OPC_BARY_F, 1, 2);
721*61046927SAndroid Build Coastguard Worker ir3_dst_create(baryf, regid(63, 0), 0);
722*61046927SAndroid Build Coastguard Worker ir3_src_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
723*61046927SAndroid Build Coastguard Worker ir3_src_create(baryf, regid(0, 0), 0);
724*61046927SAndroid Build Coastguard Worker
725*61046927SAndroid Build Coastguard Worker last_input = baryf;
726*61046927SAndroid Build Coastguard Worker }
727*61046927SAndroid Build Coastguard Worker
728*61046927SAndroid Build Coastguard Worker last_input->dsts[0]->flags |= IR3_REG_EI;
729*61046927SAndroid Build Coastguard Worker if (last_input_needs_ss) {
730*61046927SAndroid Build Coastguard Worker apply_ss(last_input, state, mergedregs);
731*61046927SAndroid Build Coastguard Worker }
732*61046927SAndroid Build Coastguard Worker }
733*61046927SAndroid Build Coastguard Worker }
734*61046927SAndroid Build Coastguard Worker
735*61046927SAndroid Build Coastguard Worker last_n = n;
736*61046927SAndroid Build Coastguard Worker }
737*61046927SAndroid Build Coastguard Worker
738*61046927SAndroid Build Coastguard Worker assert(inputs_remaining == 0 || !ctx->early_input_release);
739*61046927SAndroid Build Coastguard Worker
740*61046927SAndroid Build Coastguard Worker if (block == ir3_after_preamble(ctx->so->ir) &&
741*61046927SAndroid Build Coastguard Worker ctx->has_tex_prefetch && !ctx->has_inputs) {
742*61046927SAndroid Build Coastguard Worker /* texture prefetch, but *no* inputs.. we need to insert a
743*61046927SAndroid Build Coastguard Worker * dummy bary.f at the top of the shader to unblock varying
744*61046927SAndroid Build Coastguard Worker * storage:
745*61046927SAndroid Build Coastguard Worker */
746*61046927SAndroid Build Coastguard Worker struct ir3_instruction *baryf;
747*61046927SAndroid Build Coastguard Worker
748*61046927SAndroid Build Coastguard Worker /* (ss)bary.f (ei)r63.x, 0, r0.x */
749*61046927SAndroid Build Coastguard Worker baryf = ir3_instr_create(block, OPC_BARY_F, 1, 2);
750*61046927SAndroid Build Coastguard Worker ir3_dst_create(baryf, regid(63, 0), 0)->flags |= IR3_REG_EI;
751*61046927SAndroid Build Coastguard Worker ir3_src_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
752*61046927SAndroid Build Coastguard Worker ir3_src_create(baryf, regid(0, 0), 0);
753*61046927SAndroid Build Coastguard Worker
754*61046927SAndroid Build Coastguard Worker /* insert the dummy bary.f at head: */
755*61046927SAndroid Build Coastguard Worker list_delinit(&baryf->node);
756*61046927SAndroid Build Coastguard Worker list_add(&baryf->node, &block->instr_list);
757*61046927SAndroid Build Coastguard Worker }
758*61046927SAndroid Build Coastguard Worker
759*61046927SAndroid Build Coastguard Worker /* Currently our nop state contains the cycle offset from the start of this
760*61046927SAndroid Build Coastguard Worker * block when each register becomes ready. But successor blocks need the
761*61046927SAndroid Build Coastguard Worker * cycle offset from their start, which is this block's end. Translate the
762*61046927SAndroid Build Coastguard Worker * cycle offset.
763*61046927SAndroid Build Coastguard Worker */
764*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(state->pred_ready); i++)
765*61046927SAndroid Build Coastguard Worker state->pred_ready[i] = MAX2(state->pred_ready[i], cycle) - cycle;
766*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(state->alu_nop.full_ready); i++) {
767*61046927SAndroid Build Coastguard Worker state->alu_nop.full_ready[i] =
768*61046927SAndroid Build Coastguard Worker MAX2(state->alu_nop.full_ready[i], cycle) - cycle;
769*61046927SAndroid Build Coastguard Worker state->alu_nop.half_ready[i] =
770*61046927SAndroid Build Coastguard Worker MAX2(state->alu_nop.half_ready[i], cycle) - cycle;
771*61046927SAndroid Build Coastguard Worker state->non_alu_nop.full_ready[i] =
772*61046927SAndroid Build Coastguard Worker MAX2(state->non_alu_nop.full_ready[i], cycle) - cycle;
773*61046927SAndroid Build Coastguard Worker state->non_alu_nop.half_ready[i] =
774*61046927SAndroid Build Coastguard Worker MAX2(state->non_alu_nop.half_ready[i], cycle) - cycle;
775*61046927SAndroid Build Coastguard Worker }
776*61046927SAndroid Build Coastguard Worker
777*61046927SAndroid Build Coastguard Worker bd->valid = true;
778*61046927SAndroid Build Coastguard Worker
779*61046927SAndroid Build Coastguard Worker if (memcmp(&prev_state, state, sizeof(*state))) {
780*61046927SAndroid Build Coastguard Worker /* our output state changed, this invalidates all of our
781*61046927SAndroid Build Coastguard Worker * successors:
782*61046927SAndroid Build Coastguard Worker */
783*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
784*61046927SAndroid Build Coastguard Worker if (!block->successors[i])
785*61046927SAndroid Build Coastguard Worker break;
786*61046927SAndroid Build Coastguard Worker struct ir3_legalize_block_data *pbd = block->successors[i]->data;
787*61046927SAndroid Build Coastguard Worker pbd->valid = false;
788*61046927SAndroid Build Coastguard Worker }
789*61046927SAndroid Build Coastguard Worker }
790*61046927SAndroid Build Coastguard Worker
791*61046927SAndroid Build Coastguard Worker return true;
792*61046927SAndroid Build Coastguard Worker }
793*61046927SAndroid Build Coastguard Worker
794*61046927SAndroid Build Coastguard Worker /* Expands dsxpp and dsypp macros to:
795*61046927SAndroid Build Coastguard Worker *
796*61046927SAndroid Build Coastguard Worker * dsxpp.1 dst, src
797*61046927SAndroid Build Coastguard Worker * dsxpp.1.p dst, src
798*61046927SAndroid Build Coastguard Worker *
799*61046927SAndroid Build Coastguard Worker * We apply this after flags syncing, as we don't want to sync in between the
800*61046927SAndroid Build Coastguard Worker * two (which might happen if dst == src).
801*61046927SAndroid Build Coastguard Worker */
802*61046927SAndroid Build Coastguard Worker static bool
apply_fine_deriv_macro(struct ir3_legalize_ctx * ctx,struct ir3_block * block)803*61046927SAndroid Build Coastguard Worker apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
804*61046927SAndroid Build Coastguard Worker {
805*61046927SAndroid Build Coastguard Worker struct list_head instr_list;
806*61046927SAndroid Build Coastguard Worker
807*61046927SAndroid Build Coastguard Worker /* remove all the instructions from the list, we'll be adding
808*61046927SAndroid Build Coastguard Worker * them back in as we go
809*61046927SAndroid Build Coastguard Worker */
810*61046927SAndroid Build Coastguard Worker list_replace(&block->instr_list, &instr_list);
811*61046927SAndroid Build Coastguard Worker list_inithead(&block->instr_list);
812*61046927SAndroid Build Coastguard Worker
813*61046927SAndroid Build Coastguard Worker foreach_instr_safe (n, &instr_list) {
814*61046927SAndroid Build Coastguard Worker list_addtail(&n->node, &block->instr_list);
815*61046927SAndroid Build Coastguard Worker
816*61046927SAndroid Build Coastguard Worker if (n->opc == OPC_DSXPP_MACRO || n->opc == OPC_DSYPP_MACRO) {
817*61046927SAndroid Build Coastguard Worker n->opc = (n->opc == OPC_DSXPP_MACRO) ? OPC_DSXPP_1 : OPC_DSYPP_1;
818*61046927SAndroid Build Coastguard Worker
819*61046927SAndroid Build Coastguard Worker struct ir3_instruction *op_p = ir3_instr_clone(n);
820*61046927SAndroid Build Coastguard Worker op_p->flags = IR3_INSTR_P;
821*61046927SAndroid Build Coastguard Worker
822*61046927SAndroid Build Coastguard Worker ctx->so->need_full_quad = true;
823*61046927SAndroid Build Coastguard Worker }
824*61046927SAndroid Build Coastguard Worker }
825*61046927SAndroid Build Coastguard Worker
826*61046927SAndroid Build Coastguard Worker return true;
827*61046927SAndroid Build Coastguard Worker }
828*61046927SAndroid Build Coastguard Worker
829*61046927SAndroid Build Coastguard Worker /* Some instructions can take a dummy destination of r63.x, which we model as it
830*61046927SAndroid Build Coastguard Worker * not having a destination in the IR to avoid having special code to handle
831*61046927SAndroid Build Coastguard Worker * this. Insert the dummy destination after everything else is done.
832*61046927SAndroid Build Coastguard Worker */
833*61046927SAndroid Build Coastguard Worker static bool
expand_dummy_dests(struct ir3_block * block)834*61046927SAndroid Build Coastguard Worker expand_dummy_dests(struct ir3_block *block)
835*61046927SAndroid Build Coastguard Worker {
836*61046927SAndroid Build Coastguard Worker foreach_instr (n, &block->instr_list) {
837*61046927SAndroid Build Coastguard Worker if ((n->opc == OPC_SAM || n->opc == OPC_LDC || n->opc == OPC_RESINFO) &&
838*61046927SAndroid Build Coastguard Worker n->dsts_count == 0) {
839*61046927SAndroid Build Coastguard Worker struct ir3_register *dst = ir3_dst_create(n, INVALID_REG, 0);
840*61046927SAndroid Build Coastguard Worker /* Copy the blob's writemask */
841*61046927SAndroid Build Coastguard Worker if (n->opc == OPC_SAM)
842*61046927SAndroid Build Coastguard Worker dst->wrmask = 0b1111;
843*61046927SAndroid Build Coastguard Worker }
844*61046927SAndroid Build Coastguard Worker }
845*61046927SAndroid Build Coastguard Worker return true;
846*61046927SAndroid Build Coastguard Worker }
847*61046927SAndroid Build Coastguard Worker
848*61046927SAndroid Build Coastguard Worker static void
apply_push_consts_load_macro(struct ir3_legalize_ctx * ctx,struct ir3_block * block)849*61046927SAndroid Build Coastguard Worker apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx,
850*61046927SAndroid Build Coastguard Worker struct ir3_block *block)
851*61046927SAndroid Build Coastguard Worker {
852*61046927SAndroid Build Coastguard Worker foreach_instr (n, &block->instr_list) {
853*61046927SAndroid Build Coastguard Worker if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO) {
854*61046927SAndroid Build Coastguard Worker struct ir3_instruction *stsc = ir3_instr_create(block, OPC_STSC, 0, 2);
855*61046927SAndroid Build Coastguard Worker ir3_instr_move_after(stsc, n);
856*61046927SAndroid Build Coastguard Worker ir3_src_create(stsc, 0, IR3_REG_IMMED)->iim_val =
857*61046927SAndroid Build Coastguard Worker n->push_consts.dst_base;
858*61046927SAndroid Build Coastguard Worker ir3_src_create(stsc, 0, IR3_REG_IMMED)->iim_val =
859*61046927SAndroid Build Coastguard Worker n->push_consts.src_base;
860*61046927SAndroid Build Coastguard Worker stsc->cat6.iim_val = n->push_consts.src_size;
861*61046927SAndroid Build Coastguard Worker stsc->cat6.type = TYPE_U32;
862*61046927SAndroid Build Coastguard Worker
863*61046927SAndroid Build Coastguard Worker if (ctx->compiler->stsc_duplication_quirk) {
864*61046927SAndroid Build Coastguard Worker struct ir3_instruction *nop = ir3_NOP(block);
865*61046927SAndroid Build Coastguard Worker ir3_instr_move_after(nop, stsc);
866*61046927SAndroid Build Coastguard Worker nop->flags |= IR3_INSTR_SS;
867*61046927SAndroid Build Coastguard Worker ir3_instr_move_after(ir3_instr_clone(stsc), nop);
868*61046927SAndroid Build Coastguard Worker }
869*61046927SAndroid Build Coastguard Worker
870*61046927SAndroid Build Coastguard Worker list_delinit(&n->node);
871*61046927SAndroid Build Coastguard Worker break;
872*61046927SAndroid Build Coastguard Worker } else if (!is_meta(n)) {
873*61046927SAndroid Build Coastguard Worker break;
874*61046927SAndroid Build Coastguard Worker }
875*61046927SAndroid Build Coastguard Worker }
876*61046927SAndroid Build Coastguard Worker }
877*61046927SAndroid Build Coastguard Worker
878*61046927SAndroid Build Coastguard Worker /* NOTE: branch instructions are always the last instruction(s)
879*61046927SAndroid Build Coastguard Worker * in the block. We take advantage of this as we resolve the
880*61046927SAndroid Build Coastguard Worker * branches, since "if (foo) break;" constructs turn into
881*61046927SAndroid Build Coastguard Worker * something like:
882*61046927SAndroid Build Coastguard Worker *
883*61046927SAndroid Build Coastguard Worker * block3 {
884*61046927SAndroid Build Coastguard Worker * ...
885*61046927SAndroid Build Coastguard Worker * 0029:021: mov.s32s32 r62.x, r1.y
886*61046927SAndroid Build Coastguard Worker * 0082:022: br !p0.x, target=block5
887*61046927SAndroid Build Coastguard Worker * 0083:023: br p0.x, target=block4
888*61046927SAndroid Build Coastguard Worker * // succs: if _[0029:021: mov.s32s32] block4; else block5;
889*61046927SAndroid Build Coastguard Worker * }
890*61046927SAndroid Build Coastguard Worker * block4 {
891*61046927SAndroid Build Coastguard Worker * 0084:024: jump, target=block6
892*61046927SAndroid Build Coastguard Worker * // succs: block6;
893*61046927SAndroid Build Coastguard Worker * }
894*61046927SAndroid Build Coastguard Worker * block5 {
895*61046927SAndroid Build Coastguard Worker * 0085:025: jump, target=block7
896*61046927SAndroid Build Coastguard Worker * // succs: block7;
897*61046927SAndroid Build Coastguard Worker * }
898*61046927SAndroid Build Coastguard Worker *
899*61046927SAndroid Build Coastguard Worker * ie. only instruction in block4/block5 is a jump, so when
900*61046927SAndroid Build Coastguard Worker * resolving branches we can easily detect this by checking
901*61046927SAndroid Build Coastguard Worker * that the first instruction in the target block is itself
902*61046927SAndroid Build Coastguard Worker * a jump, and setup the br directly to the jump's target
903*61046927SAndroid Build Coastguard Worker * (and strip back out the now unreached jump)
904*61046927SAndroid Build Coastguard Worker *
905*61046927SAndroid Build Coastguard Worker * TODO sometimes we end up with things like:
906*61046927SAndroid Build Coastguard Worker *
907*61046927SAndroid Build Coastguard Worker * br !p0.x, #2
908*61046927SAndroid Build Coastguard Worker * br p0.x, #12
909*61046927SAndroid Build Coastguard Worker * add.u r0.y, r0.y, 1
910*61046927SAndroid Build Coastguard Worker *
911*61046927SAndroid Build Coastguard Worker * If we swapped the order of the branches, we could drop one.
912*61046927SAndroid Build Coastguard Worker */
913*61046927SAndroid Build Coastguard Worker static struct ir3_block *
resolve_dest_block(struct ir3_block * block)914*61046927SAndroid Build Coastguard Worker resolve_dest_block(struct ir3_block *block)
915*61046927SAndroid Build Coastguard Worker {
916*61046927SAndroid Build Coastguard Worker /* special case for last block: */
917*61046927SAndroid Build Coastguard Worker if (!block->successors[0])
918*61046927SAndroid Build Coastguard Worker return block;
919*61046927SAndroid Build Coastguard Worker
920*61046927SAndroid Build Coastguard Worker /* NOTE that we may or may not have inserted the jump
921*61046927SAndroid Build Coastguard Worker * in the target block yet, so conditions to resolve
922*61046927SAndroid Build Coastguard Worker * the dest to the dest block's successor are:
923*61046927SAndroid Build Coastguard Worker *
924*61046927SAndroid Build Coastguard Worker * (1) successor[1] == NULL &&
925*61046927SAndroid Build Coastguard Worker * (2) (block-is-empty || only-instr-is-jump)
926*61046927SAndroid Build Coastguard Worker */
927*61046927SAndroid Build Coastguard Worker if (block->successors[1] == NULL) {
928*61046927SAndroid Build Coastguard Worker if (list_is_empty(&block->instr_list)) {
929*61046927SAndroid Build Coastguard Worker return block->successors[0];
930*61046927SAndroid Build Coastguard Worker } else if (list_length(&block->instr_list) == 1) {
931*61046927SAndroid Build Coastguard Worker struct ir3_instruction *instr =
932*61046927SAndroid Build Coastguard Worker list_first_entry(&block->instr_list, struct ir3_instruction, node);
933*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_JUMP) {
934*61046927SAndroid Build Coastguard Worker /* If this jump is backwards, then we will probably convert
935*61046927SAndroid Build Coastguard Worker * the jump being resolved to a backwards jump, which will
936*61046927SAndroid Build Coastguard Worker * change a loop-with-continue or loop-with-if into a
937*61046927SAndroid Build Coastguard Worker * doubly-nested loop and change the convergence behavior.
938*61046927SAndroid Build Coastguard Worker * Disallow this here.
939*61046927SAndroid Build Coastguard Worker */
940*61046927SAndroid Build Coastguard Worker if (block->successors[0]->index <= block->index)
941*61046927SAndroid Build Coastguard Worker return block;
942*61046927SAndroid Build Coastguard Worker return block->successors[0];
943*61046927SAndroid Build Coastguard Worker }
944*61046927SAndroid Build Coastguard Worker }
945*61046927SAndroid Build Coastguard Worker }
946*61046927SAndroid Build Coastguard Worker return block;
947*61046927SAndroid Build Coastguard Worker }
948*61046927SAndroid Build Coastguard Worker
949*61046927SAndroid Build Coastguard Worker static void
remove_unused_block(struct ir3_block * old_target)950*61046927SAndroid Build Coastguard Worker remove_unused_block(struct ir3_block *old_target)
951*61046927SAndroid Build Coastguard Worker {
952*61046927SAndroid Build Coastguard Worker list_delinit(&old_target->node);
953*61046927SAndroid Build Coastguard Worker
954*61046927SAndroid Build Coastguard Worker /* cleanup dangling predecessors: */
955*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(old_target->successors); i++) {
956*61046927SAndroid Build Coastguard Worker if (old_target->successors[i]) {
957*61046927SAndroid Build Coastguard Worker struct ir3_block *succ = old_target->successors[i];
958*61046927SAndroid Build Coastguard Worker ir3_block_remove_predecessor(succ, old_target);
959*61046927SAndroid Build Coastguard Worker }
960*61046927SAndroid Build Coastguard Worker }
961*61046927SAndroid Build Coastguard Worker }
962*61046927SAndroid Build Coastguard Worker
963*61046927SAndroid Build Coastguard Worker static bool
retarget_jump(struct ir3_instruction * instr,struct ir3_block * new_target)964*61046927SAndroid Build Coastguard Worker retarget_jump(struct ir3_instruction *instr, struct ir3_block *new_target)
965*61046927SAndroid Build Coastguard Worker {
966*61046927SAndroid Build Coastguard Worker struct ir3_block *old_target = instr->cat0.target;
967*61046927SAndroid Build Coastguard Worker struct ir3_block *cur_block = instr->block;
968*61046927SAndroid Build Coastguard Worker
969*61046927SAndroid Build Coastguard Worker /* update current blocks successors to reflect the retargetting: */
970*61046927SAndroid Build Coastguard Worker if (cur_block->successors[0] == old_target) {
971*61046927SAndroid Build Coastguard Worker cur_block->successors[0] = new_target;
972*61046927SAndroid Build Coastguard Worker } else {
973*61046927SAndroid Build Coastguard Worker assert(cur_block->successors[1] == old_target);
974*61046927SAndroid Build Coastguard Worker cur_block->successors[1] = new_target;
975*61046927SAndroid Build Coastguard Worker }
976*61046927SAndroid Build Coastguard Worker
977*61046927SAndroid Build Coastguard Worker /* update new target's predecessors: */
978*61046927SAndroid Build Coastguard Worker ir3_block_add_predecessor(new_target, cur_block);
979*61046927SAndroid Build Coastguard Worker
980*61046927SAndroid Build Coastguard Worker /* and remove old_target's predecessor: */
981*61046927SAndroid Build Coastguard Worker ir3_block_remove_predecessor(old_target, cur_block);
982*61046927SAndroid Build Coastguard Worker
983*61046927SAndroid Build Coastguard Worker instr->cat0.target = new_target;
984*61046927SAndroid Build Coastguard Worker
985*61046927SAndroid Build Coastguard Worker if (old_target->predecessors_count == 0) {
986*61046927SAndroid Build Coastguard Worker remove_unused_block(old_target);
987*61046927SAndroid Build Coastguard Worker return true;
988*61046927SAndroid Build Coastguard Worker }
989*61046927SAndroid Build Coastguard Worker
990*61046927SAndroid Build Coastguard Worker return false;
991*61046927SAndroid Build Coastguard Worker }
992*61046927SAndroid Build Coastguard Worker
993*61046927SAndroid Build Coastguard Worker static bool
is_invertible_branch(struct ir3_instruction * instr)994*61046927SAndroid Build Coastguard Worker is_invertible_branch(struct ir3_instruction *instr)
995*61046927SAndroid Build Coastguard Worker {
996*61046927SAndroid Build Coastguard Worker switch (instr->opc) {
997*61046927SAndroid Build Coastguard Worker case OPC_BR:
998*61046927SAndroid Build Coastguard Worker case OPC_BRAA:
999*61046927SAndroid Build Coastguard Worker case OPC_BRAO:
1000*61046927SAndroid Build Coastguard Worker case OPC_BANY:
1001*61046927SAndroid Build Coastguard Worker case OPC_BALL:
1002*61046927SAndroid Build Coastguard Worker return true;
1003*61046927SAndroid Build Coastguard Worker default:
1004*61046927SAndroid Build Coastguard Worker return false;
1005*61046927SAndroid Build Coastguard Worker }
1006*61046927SAndroid Build Coastguard Worker }
1007*61046927SAndroid Build Coastguard Worker
1008*61046927SAndroid Build Coastguard Worker static bool
opt_jump(struct ir3 * ir)1009*61046927SAndroid Build Coastguard Worker opt_jump(struct ir3 *ir)
1010*61046927SAndroid Build Coastguard Worker {
1011*61046927SAndroid Build Coastguard Worker bool progress = false;
1012*61046927SAndroid Build Coastguard Worker
1013*61046927SAndroid Build Coastguard Worker unsigned index = 0;
1014*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list)
1015*61046927SAndroid Build Coastguard Worker block->index = index++;
1016*61046927SAndroid Build Coastguard Worker
1017*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1018*61046927SAndroid Build Coastguard Worker /* This pass destroys the physical CFG so don't keep it around to avoid
1019*61046927SAndroid Build Coastguard Worker * validation errors.
1020*61046927SAndroid Build Coastguard Worker */
1021*61046927SAndroid Build Coastguard Worker block->physical_successors_count = 0;
1022*61046927SAndroid Build Coastguard Worker block->physical_predecessors_count = 0;
1023*61046927SAndroid Build Coastguard Worker
1024*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list) {
1025*61046927SAndroid Build Coastguard Worker if (!is_flow(instr) || !instr->cat0.target)
1026*61046927SAndroid Build Coastguard Worker continue;
1027*61046927SAndroid Build Coastguard Worker
1028*61046927SAndroid Build Coastguard Worker struct ir3_block *tblock = resolve_dest_block(instr->cat0.target);
1029*61046927SAndroid Build Coastguard Worker if (tblock != instr->cat0.target) {
1030*61046927SAndroid Build Coastguard Worker progress = true;
1031*61046927SAndroid Build Coastguard Worker
1032*61046927SAndroid Build Coastguard Worker /* Exit early if we deleted a block to avoid iterator
1033*61046927SAndroid Build Coastguard Worker * weirdness/assert fails
1034*61046927SAndroid Build Coastguard Worker */
1035*61046927SAndroid Build Coastguard Worker if (retarget_jump(instr, tblock))
1036*61046927SAndroid Build Coastguard Worker return true;
1037*61046927SAndroid Build Coastguard Worker }
1038*61046927SAndroid Build Coastguard Worker }
1039*61046927SAndroid Build Coastguard Worker
1040*61046927SAndroid Build Coastguard Worker /* Detect the case where the block ends either with:
1041*61046927SAndroid Build Coastguard Worker * - A single unconditional jump to the next block.
1042*61046927SAndroid Build Coastguard Worker * - Two jump instructions with opposite conditions, and one of the
1043*61046927SAndroid Build Coastguard Worker * them jumps to the next block.
1044*61046927SAndroid Build Coastguard Worker * We can remove the one that jumps to the next block in either case.
1045*61046927SAndroid Build Coastguard Worker */
1046*61046927SAndroid Build Coastguard Worker if (list_is_empty(&block->instr_list))
1047*61046927SAndroid Build Coastguard Worker continue;
1048*61046927SAndroid Build Coastguard Worker
1049*61046927SAndroid Build Coastguard Worker struct ir3_instruction *jumps[2] = {NULL, NULL};
1050*61046927SAndroid Build Coastguard Worker jumps[0] =
1051*61046927SAndroid Build Coastguard Worker list_last_entry(&block->instr_list, struct ir3_instruction, node);
1052*61046927SAndroid Build Coastguard Worker if (!list_is_singular(&block->instr_list))
1053*61046927SAndroid Build Coastguard Worker jumps[1] =
1054*61046927SAndroid Build Coastguard Worker list_last_entry(&jumps[0]->node, struct ir3_instruction, node);
1055*61046927SAndroid Build Coastguard Worker
1056*61046927SAndroid Build Coastguard Worker if (jumps[0]->opc == OPC_JUMP)
1057*61046927SAndroid Build Coastguard Worker jumps[1] = NULL;
1058*61046927SAndroid Build Coastguard Worker else if (!is_invertible_branch(jumps[0]) || !jumps[1] ||
1059*61046927SAndroid Build Coastguard Worker !is_invertible_branch(jumps[1])) {
1060*61046927SAndroid Build Coastguard Worker continue;
1061*61046927SAndroid Build Coastguard Worker }
1062*61046927SAndroid Build Coastguard Worker
1063*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < 2; i++) {
1064*61046927SAndroid Build Coastguard Worker if (!jumps[i])
1065*61046927SAndroid Build Coastguard Worker continue;
1066*61046927SAndroid Build Coastguard Worker struct ir3_block *tblock = jumps[i]->cat0.target;
1067*61046927SAndroid Build Coastguard Worker if (&tblock->node == block->node.next) {
1068*61046927SAndroid Build Coastguard Worker list_delinit(&jumps[i]->node);
1069*61046927SAndroid Build Coastguard Worker progress = true;
1070*61046927SAndroid Build Coastguard Worker break;
1071*61046927SAndroid Build Coastguard Worker }
1072*61046927SAndroid Build Coastguard Worker }
1073*61046927SAndroid Build Coastguard Worker }
1074*61046927SAndroid Build Coastguard Worker
1075*61046927SAndroid Build Coastguard Worker return progress;
1076*61046927SAndroid Build Coastguard Worker }
1077*61046927SAndroid Build Coastguard Worker
1078*61046927SAndroid Build Coastguard Worker static void
resolve_jumps(struct ir3 * ir)1079*61046927SAndroid Build Coastguard Worker resolve_jumps(struct ir3 *ir)
1080*61046927SAndroid Build Coastguard Worker {
1081*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list)
1082*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list)
1083*61046927SAndroid Build Coastguard Worker if (is_flow(instr) && instr->cat0.target) {
1084*61046927SAndroid Build Coastguard Worker struct ir3_instruction *target = list_first_entry(
1085*61046927SAndroid Build Coastguard Worker &instr->cat0.target->instr_list, struct ir3_instruction, node);
1086*61046927SAndroid Build Coastguard Worker
1087*61046927SAndroid Build Coastguard Worker instr->cat0.immed = (int)target->ip - (int)instr->ip;
1088*61046927SAndroid Build Coastguard Worker }
1089*61046927SAndroid Build Coastguard Worker }
1090*61046927SAndroid Build Coastguard Worker
1091*61046927SAndroid Build Coastguard Worker static void
mark_jp(struct ir3_block * block)1092*61046927SAndroid Build Coastguard Worker mark_jp(struct ir3_block *block)
1093*61046927SAndroid Build Coastguard Worker {
1094*61046927SAndroid Build Coastguard Worker /* We only call this on the end block (in kill_sched) or after retargeting
1095*61046927SAndroid Build Coastguard Worker * all jumps to empty blocks (in mark_xvergence_points) so there's no need to
1096*61046927SAndroid Build Coastguard Worker * worry about empty blocks.
1097*61046927SAndroid Build Coastguard Worker */
1098*61046927SAndroid Build Coastguard Worker assert(!list_is_empty(&block->instr_list));
1099*61046927SAndroid Build Coastguard Worker
1100*61046927SAndroid Build Coastguard Worker struct ir3_instruction *target =
1101*61046927SAndroid Build Coastguard Worker list_first_entry(&block->instr_list, struct ir3_instruction, node);
1102*61046927SAndroid Build Coastguard Worker target->flags |= IR3_INSTR_JP;
1103*61046927SAndroid Build Coastguard Worker }
1104*61046927SAndroid Build Coastguard Worker
1105*61046927SAndroid Build Coastguard Worker /* Mark points where control flow reconverges.
1106*61046927SAndroid Build Coastguard Worker *
1107*61046927SAndroid Build Coastguard Worker * Re-convergence points are where "parked" threads are reconverged with threads
1108*61046927SAndroid Build Coastguard Worker * that took the opposite path last time around. We already calculated them, we
1109*61046927SAndroid Build Coastguard Worker * just need to mark them with (jp).
1110*61046927SAndroid Build Coastguard Worker */
1111*61046927SAndroid Build Coastguard Worker static void
mark_xvergence_points(struct ir3 * ir)1112*61046927SAndroid Build Coastguard Worker mark_xvergence_points(struct ir3 *ir)
1113*61046927SAndroid Build Coastguard Worker {
1114*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1115*61046927SAndroid Build Coastguard Worker if (block->reconvergence_point)
1116*61046927SAndroid Build Coastguard Worker mark_jp(block);
1117*61046927SAndroid Build Coastguard Worker }
1118*61046927SAndroid Build Coastguard Worker }
1119*61046927SAndroid Build Coastguard Worker
1120*61046927SAndroid Build Coastguard Worker static void
invert_branch(struct ir3_instruction * branch)1121*61046927SAndroid Build Coastguard Worker invert_branch(struct ir3_instruction *branch)
1122*61046927SAndroid Build Coastguard Worker {
1123*61046927SAndroid Build Coastguard Worker switch (branch->opc) {
1124*61046927SAndroid Build Coastguard Worker case OPC_BR:
1125*61046927SAndroid Build Coastguard Worker break;
1126*61046927SAndroid Build Coastguard Worker case OPC_BALL:
1127*61046927SAndroid Build Coastguard Worker branch->opc = OPC_BANY;
1128*61046927SAndroid Build Coastguard Worker break;
1129*61046927SAndroid Build Coastguard Worker case OPC_BANY:
1130*61046927SAndroid Build Coastguard Worker branch->opc = OPC_BALL;
1131*61046927SAndroid Build Coastguard Worker break;
1132*61046927SAndroid Build Coastguard Worker case OPC_BRAA:
1133*61046927SAndroid Build Coastguard Worker branch->opc = OPC_BRAO;
1134*61046927SAndroid Build Coastguard Worker break;
1135*61046927SAndroid Build Coastguard Worker case OPC_BRAO:
1136*61046927SAndroid Build Coastguard Worker branch->opc = OPC_BRAA;
1137*61046927SAndroid Build Coastguard Worker break;
1138*61046927SAndroid Build Coastguard Worker default:
1139*61046927SAndroid Build Coastguard Worker unreachable("can't get here");
1140*61046927SAndroid Build Coastguard Worker }
1141*61046927SAndroid Build Coastguard Worker
1142*61046927SAndroid Build Coastguard Worker branch->cat0.inv1 = !branch->cat0.inv1;
1143*61046927SAndroid Build Coastguard Worker branch->cat0.inv2 = !branch->cat0.inv2;
1144*61046927SAndroid Build Coastguard Worker branch->cat0.target = branch->block->successors[1];
1145*61046927SAndroid Build Coastguard Worker }
1146*61046927SAndroid Build Coastguard Worker
1147*61046927SAndroid Build Coastguard Worker /* Insert the branch/jump instructions for flow control between blocks.
1148*61046927SAndroid Build Coastguard Worker * Initially this is done naively, without considering if the successor
1149*61046927SAndroid Build Coastguard Worker * block immediately follows the current block (ie. so no jump required),
1150*61046927SAndroid Build Coastguard Worker * but that is cleaned up in opt_jump().
1151*61046927SAndroid Build Coastguard Worker */
1152*61046927SAndroid Build Coastguard Worker static void
block_sched(struct ir3 * ir)1153*61046927SAndroid Build Coastguard Worker block_sched(struct ir3 *ir)
1154*61046927SAndroid Build Coastguard Worker {
1155*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1156*61046927SAndroid Build Coastguard Worker struct ir3_instruction *terminator = ir3_block_get_terminator(block);
1157*61046927SAndroid Build Coastguard Worker
1158*61046927SAndroid Build Coastguard Worker if (block->successors[1]) {
1159*61046927SAndroid Build Coastguard Worker /* if/else, conditional branches to "then" or "else": */
1160*61046927SAndroid Build Coastguard Worker struct ir3_instruction *br1, *br2;
1161*61046927SAndroid Build Coastguard Worker
1162*61046927SAndroid Build Coastguard Worker assert(terminator);
1163*61046927SAndroid Build Coastguard Worker unsigned opc = terminator->opc;
1164*61046927SAndroid Build Coastguard Worker
1165*61046927SAndroid Build Coastguard Worker if (opc == OPC_GETONE || opc == OPC_SHPS || opc == OPC_GETLAST) {
1166*61046927SAndroid Build Coastguard Worker /* getone/shps can't be inverted, and it wouldn't even make sense
1167*61046927SAndroid Build Coastguard Worker * to follow it with an inverted branch, so follow it by an
1168*61046927SAndroid Build Coastguard Worker * unconditional branch.
1169*61046927SAndroid Build Coastguard Worker */
1170*61046927SAndroid Build Coastguard Worker assert(terminator->srcs_count == 0);
1171*61046927SAndroid Build Coastguard Worker br1 = terminator;
1172*61046927SAndroid Build Coastguard Worker br1->cat0.target = block->successors[1];
1173*61046927SAndroid Build Coastguard Worker
1174*61046927SAndroid Build Coastguard Worker br2 = ir3_JUMP(block);
1175*61046927SAndroid Build Coastguard Worker br2->cat0.target = block->successors[0];
1176*61046927SAndroid Build Coastguard Worker } else if (opc == OPC_BR || opc == OPC_BRAA || opc == OPC_BRAO ||
1177*61046927SAndroid Build Coastguard Worker opc == OPC_BALL || opc == OPC_BANY) {
1178*61046927SAndroid Build Coastguard Worker /* create "else" branch first (since "then" block should
1179*61046927SAndroid Build Coastguard Worker * frequently/always end up being a fall-thru):
1180*61046927SAndroid Build Coastguard Worker */
1181*61046927SAndroid Build Coastguard Worker br1 = terminator;
1182*61046927SAndroid Build Coastguard Worker br2 = ir3_instr_clone(br1);
1183*61046927SAndroid Build Coastguard Worker invert_branch(br1);
1184*61046927SAndroid Build Coastguard Worker br2->cat0.target = block->successors[0];
1185*61046927SAndroid Build Coastguard Worker } else {
1186*61046927SAndroid Build Coastguard Worker assert(opc == OPC_PREDT || opc == OPC_PREDF);
1187*61046927SAndroid Build Coastguard Worker
1188*61046927SAndroid Build Coastguard Worker /* Handled by prede_sched. */
1189*61046927SAndroid Build Coastguard Worker terminator->cat0.target = block->successors[0];
1190*61046927SAndroid Build Coastguard Worker continue;
1191*61046927SAndroid Build Coastguard Worker }
1192*61046927SAndroid Build Coastguard Worker
1193*61046927SAndroid Build Coastguard Worker /* Creating br2 caused it to be moved before the terminator b1, move it
1194*61046927SAndroid Build Coastguard Worker * back.
1195*61046927SAndroid Build Coastguard Worker */
1196*61046927SAndroid Build Coastguard Worker ir3_instr_move_after(br2, br1);
1197*61046927SAndroid Build Coastguard Worker } else if (block->successors[0]) {
1198*61046927SAndroid Build Coastguard Worker /* otherwise unconditional jump or predt/predf to next block which
1199*61046927SAndroid Build Coastguard Worker * should already have been inserted.
1200*61046927SAndroid Build Coastguard Worker */
1201*61046927SAndroid Build Coastguard Worker assert(terminator);
1202*61046927SAndroid Build Coastguard Worker assert(terminator->opc == OPC_JUMP || terminator->opc == OPC_PREDT ||
1203*61046927SAndroid Build Coastguard Worker terminator->opc == OPC_PREDF);
1204*61046927SAndroid Build Coastguard Worker terminator->cat0.target = block->successors[0];
1205*61046927SAndroid Build Coastguard Worker }
1206*61046927SAndroid Build Coastguard Worker }
1207*61046927SAndroid Build Coastguard Worker }
1208*61046927SAndroid Build Coastguard Worker
1209*61046927SAndroid Build Coastguard Worker static void
prede_sched(struct ir3 * ir)1210*61046927SAndroid Build Coastguard Worker prede_sched(struct ir3 *ir)
1211*61046927SAndroid Build Coastguard Worker {
1212*61046927SAndroid Build Coastguard Worker unsigned index = 0;
1213*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list)
1214*61046927SAndroid Build Coastguard Worker block->index = index++;
1215*61046927SAndroid Build Coastguard Worker
1216*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1217*61046927SAndroid Build Coastguard Worker /* Look for the following pattern generated by NIR lowering. The numbers
1218*61046927SAndroid Build Coastguard Worker * at the top of blocks are their index.
1219*61046927SAndroid Build Coastguard Worker * |--- i ----|
1220*61046927SAndroid Build Coastguard Worker * | ... |
1221*61046927SAndroid Build Coastguard Worker * | pred[tf] |
1222*61046927SAndroid Build Coastguard Worker * |----------|
1223*61046927SAndroid Build Coastguard Worker * succ0 / \ succ1
1224*61046927SAndroid Build Coastguard Worker * |-- i+1 ---| |-- i+2 ---|
1225*61046927SAndroid Build Coastguard Worker * | ... | | ... |
1226*61046927SAndroid Build Coastguard Worker * | pred[ft] | | ... |
1227*61046927SAndroid Build Coastguard Worker * |----------| |----------|
1228*61046927SAndroid Build Coastguard Worker * succ0 \ / succ0
1229*61046927SAndroid Build Coastguard Worker * |--- j ----|
1230*61046927SAndroid Build Coastguard Worker * | ... |
1231*61046927SAndroid Build Coastguard Worker * |----------|
1232*61046927SAndroid Build Coastguard Worker */
1233*61046927SAndroid Build Coastguard Worker struct ir3_block *succ0 = block->successors[0];
1234*61046927SAndroid Build Coastguard Worker struct ir3_block *succ1 = block->successors[1];
1235*61046927SAndroid Build Coastguard Worker
1236*61046927SAndroid Build Coastguard Worker if (!succ1)
1237*61046927SAndroid Build Coastguard Worker continue;
1238*61046927SAndroid Build Coastguard Worker
1239*61046927SAndroid Build Coastguard Worker struct ir3_instruction *terminator = ir3_block_get_terminator(block);
1240*61046927SAndroid Build Coastguard Worker if (!terminator)
1241*61046927SAndroid Build Coastguard Worker continue;
1242*61046927SAndroid Build Coastguard Worker if (terminator->opc != OPC_PREDT && terminator->opc != OPC_PREDF)
1243*61046927SAndroid Build Coastguard Worker continue;
1244*61046927SAndroid Build Coastguard Worker
1245*61046927SAndroid Build Coastguard Worker assert(!succ0->successors[1] && !succ1->successors[1]);
1246*61046927SAndroid Build Coastguard Worker assert(succ0->successors[0] == succ1->successors[0]);
1247*61046927SAndroid Build Coastguard Worker assert(succ0->predecessors_count == 1 && succ1->predecessors_count == 1);
1248*61046927SAndroid Build Coastguard Worker assert(succ0->index == (block->index + 1));
1249*61046927SAndroid Build Coastguard Worker assert(succ1->index == (block->index + 2));
1250*61046927SAndroid Build Coastguard Worker
1251*61046927SAndroid Build Coastguard Worker struct ir3_instruction *succ0_terminator =
1252*61046927SAndroid Build Coastguard Worker ir3_block_get_terminator(succ0);
1253*61046927SAndroid Build Coastguard Worker assert(succ0_terminator);
1254*61046927SAndroid Build Coastguard Worker assert(succ0_terminator->opc ==
1255*61046927SAndroid Build Coastguard Worker (terminator->opc == OPC_PREDT ? OPC_PREDF : OPC_PREDT));
1256*61046927SAndroid Build Coastguard Worker
1257*61046927SAndroid Build Coastguard Worker ASSERTED struct ir3_instruction *succ1_terminator =
1258*61046927SAndroid Build Coastguard Worker ir3_block_get_terminator(succ1);
1259*61046927SAndroid Build Coastguard Worker assert(!succ1_terminator || (succ1_terminator->opc == OPC_JUMP));
1260*61046927SAndroid Build Coastguard Worker
1261*61046927SAndroid Build Coastguard Worker /* Simple case: both successors contain instructions. Keep both blocks and
1262*61046927SAndroid Build Coastguard Worker * insert prede before the second successor's terminator:
1263*61046927SAndroid Build Coastguard Worker * |--- i ----|
1264*61046927SAndroid Build Coastguard Worker * | ... |
1265*61046927SAndroid Build Coastguard Worker * | pred[tf] |
1266*61046927SAndroid Build Coastguard Worker * |----------|
1267*61046927SAndroid Build Coastguard Worker * succ0 / \ succ1
1268*61046927SAndroid Build Coastguard Worker * |-- i+1 ---| |-- i+2 ---|
1269*61046927SAndroid Build Coastguard Worker * | ... | | ... |
1270*61046927SAndroid Build Coastguard Worker * | pred[ft] | | prede |
1271*61046927SAndroid Build Coastguard Worker * |----------| |----------|
1272*61046927SAndroid Build Coastguard Worker * succ0 \ / succ0
1273*61046927SAndroid Build Coastguard Worker * |--- j ----|
1274*61046927SAndroid Build Coastguard Worker * | ... |
1275*61046927SAndroid Build Coastguard Worker * |----------|
1276*61046927SAndroid Build Coastguard Worker */
1277*61046927SAndroid Build Coastguard Worker if (!list_is_empty(&succ1->instr_list)) {
1278*61046927SAndroid Build Coastguard Worker ir3_PREDE(succ1);
1279*61046927SAndroid Build Coastguard Worker continue;
1280*61046927SAndroid Build Coastguard Worker }
1281*61046927SAndroid Build Coastguard Worker
1282*61046927SAndroid Build Coastguard Worker /* Second successor is empty so we can remove it:
1283*61046927SAndroid Build Coastguard Worker * |--- i ----|
1284*61046927SAndroid Build Coastguard Worker * | ... |
1285*61046927SAndroid Build Coastguard Worker * | pred[tf] |
1286*61046927SAndroid Build Coastguard Worker * |----------|
1287*61046927SAndroid Build Coastguard Worker * succ0 / \ succ1
1288*61046927SAndroid Build Coastguard Worker * |-- i+1 ---| |
1289*61046927SAndroid Build Coastguard Worker * | ... | |
1290*61046927SAndroid Build Coastguard Worker * | prede | |
1291*61046927SAndroid Build Coastguard Worker * |----------| |
1292*61046927SAndroid Build Coastguard Worker * succ0 \ /
1293*61046927SAndroid Build Coastguard Worker * |--- j ----|
1294*61046927SAndroid Build Coastguard Worker * | ... |
1295*61046927SAndroid Build Coastguard Worker * |----------|
1296*61046927SAndroid Build Coastguard Worker */
1297*61046927SAndroid Build Coastguard Worker list_delinit(&succ0_terminator->node);
1298*61046927SAndroid Build Coastguard Worker ir3_PREDE(succ0);
1299*61046927SAndroid Build Coastguard Worker remove_unused_block(succ1);
1300*61046927SAndroid Build Coastguard Worker block->successors[1] = succ0->successors[0];
1301*61046927SAndroid Build Coastguard Worker ir3_block_add_predecessor(succ0->successors[0], block);
1302*61046927SAndroid Build Coastguard Worker }
1303*61046927SAndroid Build Coastguard Worker }
1304*61046927SAndroid Build Coastguard Worker
1305*61046927SAndroid Build Coastguard Worker /* Here we workaround the fact that kill doesn't actually kill the thread as
1306*61046927SAndroid Build Coastguard Worker * GL expects. The last instruction always needs to be an end instruction,
1307*61046927SAndroid Build Coastguard Worker * which means that if we're stuck in a loop where kill is the only way out,
1308*61046927SAndroid Build Coastguard Worker * then we may have to jump out to the end. kill may also have the d3d
1309*61046927SAndroid Build Coastguard Worker * semantics of converting the thread to a helper thread, rather than setting
1310*61046927SAndroid Build Coastguard Worker * the exec mask to 0, in which case the helper thread could get stuck in an
1311*61046927SAndroid Build Coastguard Worker * infinite loop.
1312*61046927SAndroid Build Coastguard Worker *
1313*61046927SAndroid Build Coastguard Worker * We do this late, both to give the scheduler the opportunity to reschedule
1314*61046927SAndroid Build Coastguard Worker * kill instructions earlier and to avoid having to create a separate basic
1315*61046927SAndroid Build Coastguard Worker * block.
1316*61046927SAndroid Build Coastguard Worker *
1317*61046927SAndroid Build Coastguard Worker * TODO: Assuming that the wavefront doesn't stop as soon as all threads are
1318*61046927SAndroid Build Coastguard Worker * killed, we might benefit by doing this more aggressively when the remaining
1319*61046927SAndroid Build Coastguard Worker * part of the program after the kill is large, since that would let us
1320*61046927SAndroid Build Coastguard Worker * skip over the instructions when there are no non-killed threads left.
1321*61046927SAndroid Build Coastguard Worker */
1322*61046927SAndroid Build Coastguard Worker static void
kill_sched(struct ir3 * ir,struct ir3_shader_variant * so)1323*61046927SAndroid Build Coastguard Worker kill_sched(struct ir3 *ir, struct ir3_shader_variant *so)
1324*61046927SAndroid Build Coastguard Worker {
1325*61046927SAndroid Build Coastguard Worker ir3_count_instructions(ir);
1326*61046927SAndroid Build Coastguard Worker
1327*61046927SAndroid Build Coastguard Worker /* True if we know that this block will always eventually lead to the end
1328*61046927SAndroid Build Coastguard Worker * block:
1329*61046927SAndroid Build Coastguard Worker */
1330*61046927SAndroid Build Coastguard Worker bool always_ends = true;
1331*61046927SAndroid Build Coastguard Worker bool added = false;
1332*61046927SAndroid Build Coastguard Worker struct ir3_block *last_block =
1333*61046927SAndroid Build Coastguard Worker list_last_entry(&ir->block_list, struct ir3_block, node);
1334*61046927SAndroid Build Coastguard Worker
1335*61046927SAndroid Build Coastguard Worker foreach_block_rev (block, &ir->block_list) {
1336*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < 2 && block->successors[i]; i++) {
1337*61046927SAndroid Build Coastguard Worker if (block->successors[i]->start_ip <= block->end_ip)
1338*61046927SAndroid Build Coastguard Worker always_ends = false;
1339*61046927SAndroid Build Coastguard Worker }
1340*61046927SAndroid Build Coastguard Worker
1341*61046927SAndroid Build Coastguard Worker if (always_ends)
1342*61046927SAndroid Build Coastguard Worker continue;
1343*61046927SAndroid Build Coastguard Worker
1344*61046927SAndroid Build Coastguard Worker foreach_instr_safe (instr, &block->instr_list) {
1345*61046927SAndroid Build Coastguard Worker if (instr->opc != OPC_KILL)
1346*61046927SAndroid Build Coastguard Worker continue;
1347*61046927SAndroid Build Coastguard Worker
1348*61046927SAndroid Build Coastguard Worker struct ir3_instruction *br = ir3_instr_create(block, OPC_BR, 0, 1);
1349*61046927SAndroid Build Coastguard Worker ir3_src_create(br, instr->srcs[0]->num, instr->srcs[0]->flags)->wrmask =
1350*61046927SAndroid Build Coastguard Worker 1;
1351*61046927SAndroid Build Coastguard Worker br->cat0.target =
1352*61046927SAndroid Build Coastguard Worker list_last_entry(&ir->block_list, struct ir3_block, node);
1353*61046927SAndroid Build Coastguard Worker
1354*61046927SAndroid Build Coastguard Worker list_del(&br->node);
1355*61046927SAndroid Build Coastguard Worker list_add(&br->node, &instr->node);
1356*61046927SAndroid Build Coastguard Worker
1357*61046927SAndroid Build Coastguard Worker added = true;
1358*61046927SAndroid Build Coastguard Worker }
1359*61046927SAndroid Build Coastguard Worker }
1360*61046927SAndroid Build Coastguard Worker
1361*61046927SAndroid Build Coastguard Worker if (added) {
1362*61046927SAndroid Build Coastguard Worker /* I'm not entirely sure how the branchstack works, but we probably
1363*61046927SAndroid Build Coastguard Worker * need to add at least one entry for the divergence which is resolved
1364*61046927SAndroid Build Coastguard Worker * at the end:
1365*61046927SAndroid Build Coastguard Worker */
1366*61046927SAndroid Build Coastguard Worker so->branchstack++;
1367*61046927SAndroid Build Coastguard Worker
1368*61046927SAndroid Build Coastguard Worker /* We don't update predecessors/successors, so we have to do this
1369*61046927SAndroid Build Coastguard Worker * manually:
1370*61046927SAndroid Build Coastguard Worker */
1371*61046927SAndroid Build Coastguard Worker mark_jp(last_block);
1372*61046927SAndroid Build Coastguard Worker }
1373*61046927SAndroid Build Coastguard Worker }
1374*61046927SAndroid Build Coastguard Worker
1375*61046927SAndroid Build Coastguard Worker static void
dbg_sync_sched(struct ir3 * ir,struct ir3_shader_variant * so)1376*61046927SAndroid Build Coastguard Worker dbg_sync_sched(struct ir3 *ir, struct ir3_shader_variant *so)
1377*61046927SAndroid Build Coastguard Worker {
1378*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1379*61046927SAndroid Build Coastguard Worker foreach_instr_safe (instr, &block->instr_list) {
1380*61046927SAndroid Build Coastguard Worker if (is_ss_producer(instr) || is_sy_producer(instr)) {
1381*61046927SAndroid Build Coastguard Worker struct ir3_instruction *nop = ir3_NOP(block);
1382*61046927SAndroid Build Coastguard Worker nop->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
1383*61046927SAndroid Build Coastguard Worker ir3_instr_move_after(nop, instr);
1384*61046927SAndroid Build Coastguard Worker }
1385*61046927SAndroid Build Coastguard Worker }
1386*61046927SAndroid Build Coastguard Worker }
1387*61046927SAndroid Build Coastguard Worker }
1388*61046927SAndroid Build Coastguard Worker
1389*61046927SAndroid Build Coastguard Worker static void
dbg_nop_sched(struct ir3 * ir,struct ir3_shader_variant * so)1390*61046927SAndroid Build Coastguard Worker dbg_nop_sched(struct ir3 *ir, struct ir3_shader_variant *so)
1391*61046927SAndroid Build Coastguard Worker {
1392*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1393*61046927SAndroid Build Coastguard Worker foreach_instr_safe (instr, &block->instr_list) {
1394*61046927SAndroid Build Coastguard Worker struct ir3_instruction *nop = ir3_NOP(block);
1395*61046927SAndroid Build Coastguard Worker nop->repeat = 5;
1396*61046927SAndroid Build Coastguard Worker ir3_instr_move_before(nop, instr);
1397*61046927SAndroid Build Coastguard Worker }
1398*61046927SAndroid Build Coastguard Worker }
1399*61046927SAndroid Build Coastguard Worker }
1400*61046927SAndroid Build Coastguard Worker
1401*61046927SAndroid Build Coastguard Worker static void
dbg_expand_rpt(struct ir3 * ir)1402*61046927SAndroid Build Coastguard Worker dbg_expand_rpt(struct ir3 *ir)
1403*61046927SAndroid Build Coastguard Worker {
1404*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1405*61046927SAndroid Build Coastguard Worker foreach_instr_safe (instr, &block->instr_list) {
1406*61046927SAndroid Build Coastguard Worker if (instr->repeat == 0 || instr->opc == OPC_NOP ||
1407*61046927SAndroid Build Coastguard Worker instr->opc == OPC_SWZ || instr->opc == OPC_GAT ||
1408*61046927SAndroid Build Coastguard Worker instr->opc == OPC_SCT) {
1409*61046927SAndroid Build Coastguard Worker continue;
1410*61046927SAndroid Build Coastguard Worker }
1411*61046927SAndroid Build Coastguard Worker
1412*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i <= instr->repeat; ++i) {
1413*61046927SAndroid Build Coastguard Worker struct ir3_instruction *rpt = ir3_instr_clone(instr);
1414*61046927SAndroid Build Coastguard Worker ir3_instr_move_before(rpt, instr);
1415*61046927SAndroid Build Coastguard Worker rpt->repeat = 0;
1416*61046927SAndroid Build Coastguard Worker
1417*61046927SAndroid Build Coastguard Worker foreach_dst (dst, rpt) {
1418*61046927SAndroid Build Coastguard Worker dst->num += i;
1419*61046927SAndroid Build Coastguard Worker dst->wrmask = 1;
1420*61046927SAndroid Build Coastguard Worker }
1421*61046927SAndroid Build Coastguard Worker
1422*61046927SAndroid Build Coastguard Worker foreach_src (src, rpt) {
1423*61046927SAndroid Build Coastguard Worker if (!(src->flags & IR3_REG_R))
1424*61046927SAndroid Build Coastguard Worker continue;
1425*61046927SAndroid Build Coastguard Worker
1426*61046927SAndroid Build Coastguard Worker src->num += i;
1427*61046927SAndroid Build Coastguard Worker src->uim_val += i;
1428*61046927SAndroid Build Coastguard Worker src->wrmask = 1;
1429*61046927SAndroid Build Coastguard Worker src->flags &= ~IR3_REG_R;
1430*61046927SAndroid Build Coastguard Worker }
1431*61046927SAndroid Build Coastguard Worker }
1432*61046927SAndroid Build Coastguard Worker
1433*61046927SAndroid Build Coastguard Worker list_delinit(&instr->node);
1434*61046927SAndroid Build Coastguard Worker }
1435*61046927SAndroid Build Coastguard Worker }
1436*61046927SAndroid Build Coastguard Worker }
1437*61046927SAndroid Build Coastguard Worker
1438*61046927SAndroid Build Coastguard Worker struct ir3_helper_block_data {
1439*61046927SAndroid Build Coastguard Worker /* Whether helper invocations may be used on any path starting at the
1440*61046927SAndroid Build Coastguard Worker * beginning of the block.
1441*61046927SAndroid Build Coastguard Worker */
1442*61046927SAndroid Build Coastguard Worker bool uses_helpers_beginning;
1443*61046927SAndroid Build Coastguard Worker
1444*61046927SAndroid Build Coastguard Worker /* Whether helper invocations may be used by the end of the block. Branch
1445*61046927SAndroid Build Coastguard Worker * instructions are considered to be "between" blocks, because (eq) has to be
1446*61046927SAndroid Build Coastguard Worker * inserted after them in the successor blocks, so branch instructions using
1447*61046927SAndroid Build Coastguard Worker * helpers will result in uses_helpers_end = true for their block.
1448*61046927SAndroid Build Coastguard Worker */
1449*61046927SAndroid Build Coastguard Worker bool uses_helpers_end;
1450*61046927SAndroid Build Coastguard Worker };
1451*61046927SAndroid Build Coastguard Worker
1452*61046927SAndroid Build Coastguard Worker /* Insert (eq) after the last instruction using the results of helper
1453*61046927SAndroid Build Coastguard Worker * invocations. Use a backwards dataflow analysis to determine at which points
1454*61046927SAndroid Build Coastguard Worker * in the program helper invocations are definitely never used, and then insert
1455*61046927SAndroid Build Coastguard Worker * (eq) at the point where we cross from a point where they may be used to a
1456*61046927SAndroid Build Coastguard Worker * point where they are never used.
1457*61046927SAndroid Build Coastguard Worker */
1458*61046927SAndroid Build Coastguard Worker static void
helper_sched(struct ir3_legalize_ctx * ctx,struct ir3 * ir,struct ir3_shader_variant * so)1459*61046927SAndroid Build Coastguard Worker helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir,
1460*61046927SAndroid Build Coastguard Worker struct ir3_shader_variant *so)
1461*61046927SAndroid Build Coastguard Worker {
1462*61046927SAndroid Build Coastguard Worker bool non_prefetch_helpers = false;
1463*61046927SAndroid Build Coastguard Worker
1464*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1465*61046927SAndroid Build Coastguard Worker struct ir3_helper_block_data *bd =
1466*61046927SAndroid Build Coastguard Worker rzalloc(ctx, struct ir3_helper_block_data);
1467*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list) {
1468*61046927SAndroid Build Coastguard Worker if (uses_helpers(instr)) {
1469*61046927SAndroid Build Coastguard Worker bd->uses_helpers_beginning = true;
1470*61046927SAndroid Build Coastguard Worker if (instr->opc != OPC_META_TEX_PREFETCH) {
1471*61046927SAndroid Build Coastguard Worker non_prefetch_helpers = true;
1472*61046927SAndroid Build Coastguard Worker }
1473*61046927SAndroid Build Coastguard Worker }
1474*61046927SAndroid Build Coastguard Worker
1475*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_SHPE) {
1476*61046927SAndroid Build Coastguard Worker /* (eq) is not allowed in preambles, mark the whole preamble as
1477*61046927SAndroid Build Coastguard Worker * requiring helpers to avoid putting it there.
1478*61046927SAndroid Build Coastguard Worker */
1479*61046927SAndroid Build Coastguard Worker bd->uses_helpers_beginning = true;
1480*61046927SAndroid Build Coastguard Worker bd->uses_helpers_end = true;
1481*61046927SAndroid Build Coastguard Worker }
1482*61046927SAndroid Build Coastguard Worker }
1483*61046927SAndroid Build Coastguard Worker
1484*61046927SAndroid Build Coastguard Worker struct ir3_instruction *terminator = ir3_block_get_terminator(block);
1485*61046927SAndroid Build Coastguard Worker if (terminator) {
1486*61046927SAndroid Build Coastguard Worker if (terminator->opc == OPC_BALL || terminator->opc == OPC_BANY ||
1487*61046927SAndroid Build Coastguard Worker (terminator->opc == OPC_GETONE &&
1488*61046927SAndroid Build Coastguard Worker (terminator->flags & IR3_INSTR_NEEDS_HELPERS))) {
1489*61046927SAndroid Build Coastguard Worker bd->uses_helpers_beginning = true;
1490*61046927SAndroid Build Coastguard Worker bd->uses_helpers_end = true;
1491*61046927SAndroid Build Coastguard Worker non_prefetch_helpers = true;
1492*61046927SAndroid Build Coastguard Worker }
1493*61046927SAndroid Build Coastguard Worker }
1494*61046927SAndroid Build Coastguard Worker
1495*61046927SAndroid Build Coastguard Worker block->data = bd;
1496*61046927SAndroid Build Coastguard Worker }
1497*61046927SAndroid Build Coastguard Worker
1498*61046927SAndroid Build Coastguard Worker /* If only prefetches use helpers then we can disable them in the shader via
1499*61046927SAndroid Build Coastguard Worker * a register setting.
1500*61046927SAndroid Build Coastguard Worker */
1501*61046927SAndroid Build Coastguard Worker if (!non_prefetch_helpers) {
1502*61046927SAndroid Build Coastguard Worker so->prefetch_end_of_quad = true;
1503*61046927SAndroid Build Coastguard Worker return;
1504*61046927SAndroid Build Coastguard Worker }
1505*61046927SAndroid Build Coastguard Worker
1506*61046927SAndroid Build Coastguard Worker bool progress;
1507*61046927SAndroid Build Coastguard Worker do {
1508*61046927SAndroid Build Coastguard Worker progress = false;
1509*61046927SAndroid Build Coastguard Worker foreach_block_rev (block, &ir->block_list) {
1510*61046927SAndroid Build Coastguard Worker struct ir3_helper_block_data *bd = block->data;
1511*61046927SAndroid Build Coastguard Worker
1512*61046927SAndroid Build Coastguard Worker if (!bd->uses_helpers_beginning)
1513*61046927SAndroid Build Coastguard Worker continue;
1514*61046927SAndroid Build Coastguard Worker
1515*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
1516*61046927SAndroid Build Coastguard Worker struct ir3_block *pred = block->physical_predecessors[i];
1517*61046927SAndroid Build Coastguard Worker struct ir3_helper_block_data *pred_bd = pred->data;
1518*61046927SAndroid Build Coastguard Worker if (!pred_bd->uses_helpers_end) {
1519*61046927SAndroid Build Coastguard Worker pred_bd->uses_helpers_end = true;
1520*61046927SAndroid Build Coastguard Worker }
1521*61046927SAndroid Build Coastguard Worker if (!pred_bd->uses_helpers_beginning) {
1522*61046927SAndroid Build Coastguard Worker pred_bd->uses_helpers_beginning = true;
1523*61046927SAndroid Build Coastguard Worker progress = true;
1524*61046927SAndroid Build Coastguard Worker }
1525*61046927SAndroid Build Coastguard Worker }
1526*61046927SAndroid Build Coastguard Worker }
1527*61046927SAndroid Build Coastguard Worker } while (progress);
1528*61046927SAndroid Build Coastguard Worker
1529*61046927SAndroid Build Coastguard Worker /* Now, we need to determine the points where helper invocations become
1530*61046927SAndroid Build Coastguard Worker * unused.
1531*61046927SAndroid Build Coastguard Worker */
1532*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1533*61046927SAndroid Build Coastguard Worker struct ir3_helper_block_data *bd = block->data;
1534*61046927SAndroid Build Coastguard Worker if (bd->uses_helpers_end)
1535*61046927SAndroid Build Coastguard Worker continue;
1536*61046927SAndroid Build Coastguard Worker
1537*61046927SAndroid Build Coastguard Worker /* We need to check the predecessors because of situations with critical
1538*61046927SAndroid Build Coastguard Worker * edges like this that can occur after optimizing jumps:
1539*61046927SAndroid Build Coastguard Worker *
1540*61046927SAndroid Build Coastguard Worker * br p0.x, #endif
1541*61046927SAndroid Build Coastguard Worker * ...
1542*61046927SAndroid Build Coastguard Worker * sam ...
1543*61046927SAndroid Build Coastguard Worker * ...
1544*61046927SAndroid Build Coastguard Worker * endif:
1545*61046927SAndroid Build Coastguard Worker * ...
1546*61046927SAndroid Build Coastguard Worker * end
1547*61046927SAndroid Build Coastguard Worker *
1548*61046927SAndroid Build Coastguard Worker * The endif block will have uses_helpers_beginning = false and
1549*61046927SAndroid Build Coastguard Worker * uses_helpers_end = false, but because we jump to there from the
1550*61046927SAndroid Build Coastguard Worker * beginning of the if where uses_helpers_end = true, we still want to
1551*61046927SAndroid Build Coastguard Worker * add an (eq) at the beginning of the block:
1552*61046927SAndroid Build Coastguard Worker *
1553*61046927SAndroid Build Coastguard Worker * br p0.x, #endif
1554*61046927SAndroid Build Coastguard Worker * ...
1555*61046927SAndroid Build Coastguard Worker * sam ...
1556*61046927SAndroid Build Coastguard Worker * (eq)nop
1557*61046927SAndroid Build Coastguard Worker * ...
1558*61046927SAndroid Build Coastguard Worker * endif:
1559*61046927SAndroid Build Coastguard Worker * (eq)nop
1560*61046927SAndroid Build Coastguard Worker * ...
1561*61046927SAndroid Build Coastguard Worker * end
1562*61046927SAndroid Build Coastguard Worker *
1563*61046927SAndroid Build Coastguard Worker * This an extra nop in the case where the branch isn't taken, but that's
1564*61046927SAndroid Build Coastguard Worker * probably preferable to adding an extra jump instruction which is what
1565*61046927SAndroid Build Coastguard Worker * would happen if we ran this pass before optimizing jumps:
1566*61046927SAndroid Build Coastguard Worker *
1567*61046927SAndroid Build Coastguard Worker * br p0.x, #else
1568*61046927SAndroid Build Coastguard Worker * ...
1569*61046927SAndroid Build Coastguard Worker * sam ...
1570*61046927SAndroid Build Coastguard Worker * (eq)nop
1571*61046927SAndroid Build Coastguard Worker * ...
1572*61046927SAndroid Build Coastguard Worker * jump #endif
1573*61046927SAndroid Build Coastguard Worker * else:
1574*61046927SAndroid Build Coastguard Worker * (eq)nop
1575*61046927SAndroid Build Coastguard Worker * endif:
1576*61046927SAndroid Build Coastguard Worker * ...
1577*61046927SAndroid Build Coastguard Worker * end
1578*61046927SAndroid Build Coastguard Worker *
1579*61046927SAndroid Build Coastguard Worker * We also need this to make sure we insert (eq) after branches which use
1580*61046927SAndroid Build Coastguard Worker * helper invocations.
1581*61046927SAndroid Build Coastguard Worker */
1582*61046927SAndroid Build Coastguard Worker bool pred_uses_helpers = bd->uses_helpers_beginning;
1583*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
1584*61046927SAndroid Build Coastguard Worker struct ir3_block *pred = block->physical_predecessors[i];
1585*61046927SAndroid Build Coastguard Worker struct ir3_helper_block_data *pred_bd = pred->data;
1586*61046927SAndroid Build Coastguard Worker if (pred_bd->uses_helpers_end) {
1587*61046927SAndroid Build Coastguard Worker pred_uses_helpers = true;
1588*61046927SAndroid Build Coastguard Worker break;
1589*61046927SAndroid Build Coastguard Worker }
1590*61046927SAndroid Build Coastguard Worker }
1591*61046927SAndroid Build Coastguard Worker
1592*61046927SAndroid Build Coastguard Worker if (!pred_uses_helpers)
1593*61046927SAndroid Build Coastguard Worker continue;
1594*61046927SAndroid Build Coastguard Worker
1595*61046927SAndroid Build Coastguard Worker /* The last use of helpers is somewhere between the beginning and the
1596*61046927SAndroid Build Coastguard Worker * end. first_instr will be the first instruction where helpers are no
1597*61046927SAndroid Build Coastguard Worker * longer required, or NULL if helpers are not required just at the end.
1598*61046927SAndroid Build Coastguard Worker */
1599*61046927SAndroid Build Coastguard Worker struct ir3_instruction *first_instr = NULL;
1600*61046927SAndroid Build Coastguard Worker foreach_instr_rev (instr, &block->instr_list) {
1601*61046927SAndroid Build Coastguard Worker /* Skip prefetches because they actually execute before the block
1602*61046927SAndroid Build Coastguard Worker * starts and at this stage they aren't guaranteed to be at the start
1603*61046927SAndroid Build Coastguard Worker * of the block.
1604*61046927SAndroid Build Coastguard Worker */
1605*61046927SAndroid Build Coastguard Worker if (uses_helpers(instr) && instr->opc != OPC_META_TEX_PREFETCH)
1606*61046927SAndroid Build Coastguard Worker break;
1607*61046927SAndroid Build Coastguard Worker first_instr = instr;
1608*61046927SAndroid Build Coastguard Worker }
1609*61046927SAndroid Build Coastguard Worker
1610*61046927SAndroid Build Coastguard Worker bool killed = false;
1611*61046927SAndroid Build Coastguard Worker bool expensive_instruction_in_block = false;
1612*61046927SAndroid Build Coastguard Worker if (first_instr) {
1613*61046927SAndroid Build Coastguard Worker foreach_instr_from (instr, first_instr, &block->instr_list) {
1614*61046927SAndroid Build Coastguard Worker /* If there's already a nop, we don't have to worry about whether to
1615*61046927SAndroid Build Coastguard Worker * insert one.
1616*61046927SAndroid Build Coastguard Worker */
1617*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_NOP) {
1618*61046927SAndroid Build Coastguard Worker instr->flags |= IR3_INSTR_EQ;
1619*61046927SAndroid Build Coastguard Worker killed = true;
1620*61046927SAndroid Build Coastguard Worker break;
1621*61046927SAndroid Build Coastguard Worker }
1622*61046927SAndroid Build Coastguard Worker
1623*61046927SAndroid Build Coastguard Worker /* ALU and SFU instructions probably aren't going to benefit much
1624*61046927SAndroid Build Coastguard Worker * from killing helper invocations, because they complete at least
1625*61046927SAndroid Build Coastguard Worker * an entire quad in a cycle and don't access any quad-divergent
1626*61046927SAndroid Build Coastguard Worker * memory, so delay emitting (eq) in the hopes that we find a nop
1627*61046927SAndroid Build Coastguard Worker * afterwards.
1628*61046927SAndroid Build Coastguard Worker */
1629*61046927SAndroid Build Coastguard Worker if (is_alu(instr) || is_sfu(instr))
1630*61046927SAndroid Build Coastguard Worker continue;
1631*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_PREDE)
1632*61046927SAndroid Build Coastguard Worker continue;
1633*61046927SAndroid Build Coastguard Worker
1634*61046927SAndroid Build Coastguard Worker expensive_instruction_in_block = true;
1635*61046927SAndroid Build Coastguard Worker break;
1636*61046927SAndroid Build Coastguard Worker }
1637*61046927SAndroid Build Coastguard Worker }
1638*61046927SAndroid Build Coastguard Worker
1639*61046927SAndroid Build Coastguard Worker /* If this block isn't the last block before the end instruction, assume
1640*61046927SAndroid Build Coastguard Worker * that there may be expensive instructions in later blocks so it's worth
1641*61046927SAndroid Build Coastguard Worker * it to insert a nop.
1642*61046927SAndroid Build Coastguard Worker */
1643*61046927SAndroid Build Coastguard Worker if (!killed && (expensive_instruction_in_block ||
1644*61046927SAndroid Build Coastguard Worker block->successors[0] != ir3_end_block(ir))) {
1645*61046927SAndroid Build Coastguard Worker struct ir3_instruction *nop = ir3_NOP(block);
1646*61046927SAndroid Build Coastguard Worker nop->flags |= IR3_INSTR_EQ;
1647*61046927SAndroid Build Coastguard Worker if (first_instr)
1648*61046927SAndroid Build Coastguard Worker ir3_instr_move_before(nop, first_instr);
1649*61046927SAndroid Build Coastguard Worker }
1650*61046927SAndroid Build Coastguard Worker }
1651*61046927SAndroid Build Coastguard Worker }
1652*61046927SAndroid Build Coastguard Worker
1653*61046927SAndroid Build Coastguard Worker bool
ir3_legalize(struct ir3 * ir,struct ir3_shader_variant * so,int * max_bary)1654*61046927SAndroid Build Coastguard Worker ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
1655*61046927SAndroid Build Coastguard Worker {
1656*61046927SAndroid Build Coastguard Worker struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
1657*61046927SAndroid Build Coastguard Worker bool mergedregs = so->mergedregs;
1658*61046927SAndroid Build Coastguard Worker bool progress;
1659*61046927SAndroid Build Coastguard Worker
1660*61046927SAndroid Build Coastguard Worker ctx->so = so;
1661*61046927SAndroid Build Coastguard Worker ctx->max_bary = -1;
1662*61046927SAndroid Build Coastguard Worker ctx->compiler = ir->compiler;
1663*61046927SAndroid Build Coastguard Worker ctx->type = ir->type;
1664*61046927SAndroid Build Coastguard Worker
1665*61046927SAndroid Build Coastguard Worker /* allocate per-block data: */
1666*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1667*61046927SAndroid Build Coastguard Worker struct ir3_legalize_block_data *bd =
1668*61046927SAndroid Build Coastguard Worker rzalloc(ctx, struct ir3_legalize_block_data);
1669*61046927SAndroid Build Coastguard Worker
1670*61046927SAndroid Build Coastguard Worker regmask_init(&bd->state.needs_ss_war, mergedregs);
1671*61046927SAndroid Build Coastguard Worker regmask_init(&bd->state.needs_ss_or_sy_war, mergedregs);
1672*61046927SAndroid Build Coastguard Worker regmask_init(&bd->state.needs_ss_scalar_war, mergedregs);
1673*61046927SAndroid Build Coastguard Worker regmask_init(&bd->state.needs_ss_or_sy_scalar_war, mergedregs);
1674*61046927SAndroid Build Coastguard Worker regmask_init(&bd->state.needs_ss_scalar_full, mergedregs);
1675*61046927SAndroid Build Coastguard Worker regmask_init(&bd->state.needs_ss_scalar_half, mergedregs);
1676*61046927SAndroid Build Coastguard Worker regmask_init(&bd->state.needs_ss, mergedregs);
1677*61046927SAndroid Build Coastguard Worker regmask_init(&bd->state.needs_sy, mergedregs);
1678*61046927SAndroid Build Coastguard Worker regmask_init(&bd->begin_state.needs_ss_war, mergedregs);
1679*61046927SAndroid Build Coastguard Worker regmask_init(&bd->begin_state.needs_ss_or_sy_war, mergedregs);
1680*61046927SAndroid Build Coastguard Worker regmask_init(&bd->begin_state.needs_ss_scalar_war, mergedregs);
1681*61046927SAndroid Build Coastguard Worker regmask_init(&bd->begin_state.needs_ss_or_sy_scalar_war, mergedregs);
1682*61046927SAndroid Build Coastguard Worker regmask_init(&bd->begin_state.needs_ss_scalar_full, mergedregs);
1683*61046927SAndroid Build Coastguard Worker regmask_init(&bd->begin_state.needs_ss_scalar_half, mergedregs);
1684*61046927SAndroid Build Coastguard Worker regmask_init(&bd->begin_state.needs_ss, mergedregs);
1685*61046927SAndroid Build Coastguard Worker regmask_init(&bd->begin_state.needs_sy, mergedregs);
1686*61046927SAndroid Build Coastguard Worker
1687*61046927SAndroid Build Coastguard Worker block->data = bd;
1688*61046927SAndroid Build Coastguard Worker }
1689*61046927SAndroid Build Coastguard Worker
1690*61046927SAndroid Build Coastguard Worker /* We may have failed to pull all input loads into the first block.
1691*61046927SAndroid Build Coastguard Worker * In such case at the moment we aren't able to find a better place
1692*61046927SAndroid Build Coastguard Worker * to for (ei) than the end of the program.
1693*61046927SAndroid Build Coastguard Worker * a5xx and a6xx do automatically release varying storage at the end.
1694*61046927SAndroid Build Coastguard Worker */
1695*61046927SAndroid Build Coastguard Worker ctx->early_input_release = true;
1696*61046927SAndroid Build Coastguard Worker
1697*61046927SAndroid Build Coastguard Worker struct ir3_block *start_block = ir3_after_preamble(ir);
1698*61046927SAndroid Build Coastguard Worker
1699*61046927SAndroid Build Coastguard Worker /* Gather information to determine whether we can enable early preamble.
1700*61046927SAndroid Build Coastguard Worker */
1701*61046927SAndroid Build Coastguard Worker bool gpr_in_preamble = false;
1702*61046927SAndroid Build Coastguard Worker bool pred_in_preamble = false;
1703*61046927SAndroid Build Coastguard Worker bool relative_in_preamble = false;
1704*61046927SAndroid Build Coastguard Worker bool in_preamble = start_block != ir3_start_block(ir);
1705*61046927SAndroid Build Coastguard Worker bool has_preamble = start_block != ir3_start_block(ir);
1706*61046927SAndroid Build Coastguard Worker
1707*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1708*61046927SAndroid Build Coastguard Worker if (block == start_block)
1709*61046927SAndroid Build Coastguard Worker in_preamble = false;
1710*61046927SAndroid Build Coastguard Worker
1711*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &block->instr_list) {
1712*61046927SAndroid Build Coastguard Worker if (is_input(instr)) {
1713*61046927SAndroid Build Coastguard Worker ctx->has_inputs = true;
1714*61046927SAndroid Build Coastguard Worker if (block != start_block) {
1715*61046927SAndroid Build Coastguard Worker ctx->early_input_release = false;
1716*61046927SAndroid Build Coastguard Worker }
1717*61046927SAndroid Build Coastguard Worker }
1718*61046927SAndroid Build Coastguard Worker
1719*61046927SAndroid Build Coastguard Worker if (is_meta(instr))
1720*61046927SAndroid Build Coastguard Worker continue;
1721*61046927SAndroid Build Coastguard Worker
1722*61046927SAndroid Build Coastguard Worker foreach_src (reg, instr) {
1723*61046927SAndroid Build Coastguard Worker if (in_preamble) {
1724*61046927SAndroid Build Coastguard Worker if (!(reg->flags & (IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_SHARED)) &&
1725*61046927SAndroid Build Coastguard Worker is_reg_gpr(reg))
1726*61046927SAndroid Build Coastguard Worker gpr_in_preamble = true;
1727*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_RELATIV)
1728*61046927SAndroid Build Coastguard Worker relative_in_preamble = true;
1729*61046927SAndroid Build Coastguard Worker }
1730*61046927SAndroid Build Coastguard Worker }
1731*61046927SAndroid Build Coastguard Worker
1732*61046927SAndroid Build Coastguard Worker foreach_dst (reg, instr) {
1733*61046927SAndroid Build Coastguard Worker if (is_dest_gpr(reg)) {
1734*61046927SAndroid Build Coastguard Worker if (in_preamble) {
1735*61046927SAndroid Build Coastguard Worker if (!(reg->flags & IR3_REG_SHARED))
1736*61046927SAndroid Build Coastguard Worker gpr_in_preamble = true;
1737*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_RELATIV)
1738*61046927SAndroid Build Coastguard Worker relative_in_preamble = true;
1739*61046927SAndroid Build Coastguard Worker }
1740*61046927SAndroid Build Coastguard Worker }
1741*61046927SAndroid Build Coastguard Worker }
1742*61046927SAndroid Build Coastguard Worker
1743*61046927SAndroid Build Coastguard Worker if (in_preamble && writes_pred(instr)) {
1744*61046927SAndroid Build Coastguard Worker pred_in_preamble = true;
1745*61046927SAndroid Build Coastguard Worker }
1746*61046927SAndroid Build Coastguard Worker }
1747*61046927SAndroid Build Coastguard Worker }
1748*61046927SAndroid Build Coastguard Worker
1749*61046927SAndroid Build Coastguard Worker so->early_preamble = has_preamble && !gpr_in_preamble &&
1750*61046927SAndroid Build Coastguard Worker !pred_in_preamble && !relative_in_preamble &&
1751*61046927SAndroid Build Coastguard Worker ir->compiler->has_early_preamble &&
1752*61046927SAndroid Build Coastguard Worker !(ir3_shader_debug & IR3_DBG_NOEARLYPREAMBLE);
1753*61046927SAndroid Build Coastguard Worker
1754*61046927SAndroid Build Coastguard Worker /* On a7xx, sync behavior for a1.x is different in the early preamble. RaW
1755*61046927SAndroid Build Coastguard Worker * dependencies must be synchronized with (ss) there must be an extra
1756*61046927SAndroid Build Coastguard Worker * (r) on the source of the mova1 instruction.
1757*61046927SAndroid Build Coastguard Worker */
1758*61046927SAndroid Build Coastguard Worker if (so->early_preamble && ir->compiler->gen >= 7) {
1759*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1760*61046927SAndroid Build Coastguard Worker if (block == start_block)
1761*61046927SAndroid Build Coastguard Worker break;
1762*61046927SAndroid Build Coastguard Worker block->in_early_preamble = true;
1763*61046927SAndroid Build Coastguard Worker }
1764*61046927SAndroid Build Coastguard Worker }
1765*61046927SAndroid Build Coastguard Worker
1766*61046927SAndroid Build Coastguard Worker assert(ctx->early_input_release || ctx->compiler->gen >= 5);
1767*61046927SAndroid Build Coastguard Worker
1768*61046927SAndroid Build Coastguard Worker if (ir3_shader_debug & IR3_DBG_EXPANDRPT) {
1769*61046927SAndroid Build Coastguard Worker dbg_expand_rpt(ir);
1770*61046927SAndroid Build Coastguard Worker }
1771*61046927SAndroid Build Coastguard Worker
1772*61046927SAndroid Build Coastguard Worker /* process each block: */
1773*61046927SAndroid Build Coastguard Worker do {
1774*61046927SAndroid Build Coastguard Worker progress = false;
1775*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1776*61046927SAndroid Build Coastguard Worker progress |= legalize_block(ctx, block);
1777*61046927SAndroid Build Coastguard Worker }
1778*61046927SAndroid Build Coastguard Worker } while (progress);
1779*61046927SAndroid Build Coastguard Worker
1780*61046927SAndroid Build Coastguard Worker *max_bary = ctx->max_bary;
1781*61046927SAndroid Build Coastguard Worker
1782*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1783*61046927SAndroid Build Coastguard Worker struct ir3_instruction *terminator = ir3_block_get_terminator(block);
1784*61046927SAndroid Build Coastguard Worker if (terminator && terminator->opc == OPC_GETONE) {
1785*61046927SAndroid Build Coastguard Worker apply_push_consts_load_macro(ctx, block->successors[0]);
1786*61046927SAndroid Build Coastguard Worker break;
1787*61046927SAndroid Build Coastguard Worker }
1788*61046927SAndroid Build Coastguard Worker }
1789*61046927SAndroid Build Coastguard Worker
1790*61046927SAndroid Build Coastguard Worker block_sched(ir);
1791*61046927SAndroid Build Coastguard Worker
1792*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1793*61046927SAndroid Build Coastguard Worker progress |= apply_fine_deriv_macro(ctx, block);
1794*61046927SAndroid Build Coastguard Worker }
1795*61046927SAndroid Build Coastguard Worker
1796*61046927SAndroid Build Coastguard Worker if (ir3_shader_debug & IR3_DBG_FULLSYNC) {
1797*61046927SAndroid Build Coastguard Worker dbg_sync_sched(ir, so);
1798*61046927SAndroid Build Coastguard Worker }
1799*61046927SAndroid Build Coastguard Worker
1800*61046927SAndroid Build Coastguard Worker if (ir3_shader_debug & IR3_DBG_FULLNOP) {
1801*61046927SAndroid Build Coastguard Worker dbg_nop_sched(ir, so);
1802*61046927SAndroid Build Coastguard Worker }
1803*61046927SAndroid Build Coastguard Worker
1804*61046927SAndroid Build Coastguard Worker bool cfg_changed = false;
1805*61046927SAndroid Build Coastguard Worker while (opt_jump(ir))
1806*61046927SAndroid Build Coastguard Worker cfg_changed = true;
1807*61046927SAndroid Build Coastguard Worker
1808*61046927SAndroid Build Coastguard Worker prede_sched(ir);
1809*61046927SAndroid Build Coastguard Worker
1810*61046927SAndroid Build Coastguard Worker if (cfg_changed)
1811*61046927SAndroid Build Coastguard Worker ir3_calc_reconvergence(so);
1812*61046927SAndroid Build Coastguard Worker
1813*61046927SAndroid Build Coastguard Worker if (so->type == MESA_SHADER_FRAGMENT)
1814*61046927SAndroid Build Coastguard Worker kill_sched(ir, so);
1815*61046927SAndroid Build Coastguard Worker
1816*61046927SAndroid Build Coastguard Worker /* TODO: does (eq) exist before a6xx? */
1817*61046927SAndroid Build Coastguard Worker if (so->type == MESA_SHADER_FRAGMENT && so->need_pixlod &&
1818*61046927SAndroid Build Coastguard Worker so->compiler->gen >= 6)
1819*61046927SAndroid Build Coastguard Worker helper_sched(ctx, ir, so);
1820*61046927SAndroid Build Coastguard Worker
1821*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
1822*61046927SAndroid Build Coastguard Worker progress |= expand_dummy_dests(block);
1823*61046927SAndroid Build Coastguard Worker }
1824*61046927SAndroid Build Coastguard Worker
1825*61046927SAndroid Build Coastguard Worker ir3_count_instructions(ir);
1826*61046927SAndroid Build Coastguard Worker resolve_jumps(ir);
1827*61046927SAndroid Build Coastguard Worker
1828*61046927SAndroid Build Coastguard Worker mark_xvergence_points(ir);
1829*61046927SAndroid Build Coastguard Worker
1830*61046927SAndroid Build Coastguard Worker ralloc_free(ctx);
1831*61046927SAndroid Build Coastguard Worker
1832*61046927SAndroid Build Coastguard Worker return true;
1833*61046927SAndroid Build Coastguard Worker }
1834