xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_legalize.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2014 Rob Clark <[email protected]>
3*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker  *
5*61046927SAndroid Build Coastguard Worker  * Authors:
6*61046927SAndroid Build Coastguard Worker  *    Rob Clark <[email protected]>
7*61046927SAndroid Build Coastguard Worker  */
8*61046927SAndroid Build Coastguard Worker 
9*61046927SAndroid Build Coastguard Worker #include "util/ralloc.h"
10*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
11*61046927SAndroid Build Coastguard Worker 
12*61046927SAndroid Build Coastguard Worker #include "ir3.h"
13*61046927SAndroid Build Coastguard Worker #include "ir3_shader.h"
14*61046927SAndroid Build Coastguard Worker 
15*61046927SAndroid Build Coastguard Worker /*
16*61046927SAndroid Build Coastguard Worker  * Legalize:
17*61046927SAndroid Build Coastguard Worker  *
18*61046927SAndroid Build Coastguard Worker  * The legalize pass handles ensuring sufficient nop's and sync flags for
19*61046927SAndroid Build Coastguard Worker  * correct execution.
20*61046927SAndroid Build Coastguard Worker  *
21*61046927SAndroid Build Coastguard Worker  * 1) Iteratively determine where sync ((sy)/(ss)) flags are needed,
22*61046927SAndroid Build Coastguard Worker  *    based on state flowing out of predecessor blocks until there is
23*61046927SAndroid Build Coastguard Worker  *    no further change.  In some cases this requires inserting nops.
24*61046927SAndroid Build Coastguard Worker  * 2) Mark (ei) on last varying input
25*61046927SAndroid Build Coastguard Worker  * 3) Final nop scheduling for instruction latency
26*61046927SAndroid Build Coastguard Worker  * 4) Resolve jumps and schedule blocks, marking potential convergence
27*61046927SAndroid Build Coastguard Worker  *    points with (jp)
28*61046927SAndroid Build Coastguard Worker  */
29*61046927SAndroid Build Coastguard Worker 
30*61046927SAndroid Build Coastguard Worker struct ir3_legalize_ctx {
31*61046927SAndroid Build Coastguard Worker    struct ir3_compiler *compiler;
32*61046927SAndroid Build Coastguard Worker    struct ir3_shader_variant *so;
33*61046927SAndroid Build Coastguard Worker    gl_shader_stage type;
34*61046927SAndroid Build Coastguard Worker    int max_bary;
35*61046927SAndroid Build Coastguard Worker    bool early_input_release;
36*61046927SAndroid Build Coastguard Worker    bool has_inputs;
37*61046927SAndroid Build Coastguard Worker    bool has_tex_prefetch;
38*61046927SAndroid Build Coastguard Worker };
39*61046927SAndroid Build Coastguard Worker 
40*61046927SAndroid Build Coastguard Worker struct ir3_nop_state {
41*61046927SAndroid Build Coastguard Worker    unsigned full_ready[GPR_REG_SIZE];
42*61046927SAndroid Build Coastguard Worker    unsigned half_ready[GPR_REG_SIZE];
43*61046927SAndroid Build Coastguard Worker };
44*61046927SAndroid Build Coastguard Worker 
45*61046927SAndroid Build Coastguard Worker struct ir3_legalize_state {
46*61046927SAndroid Build Coastguard Worker    regmask_t needs_ss;
47*61046927SAndroid Build Coastguard Worker    regmask_t needs_ss_scalar_full; /* half scalar ALU producer -> full scalar ALU consumer */
48*61046927SAndroid Build Coastguard Worker    regmask_t needs_ss_scalar_half; /* full scalar ALU producer -> half scalar ALU consumer */
49*61046927SAndroid Build Coastguard Worker    regmask_t needs_ss_war; /* write after read */
50*61046927SAndroid Build Coastguard Worker    regmask_t needs_ss_or_sy_war;  /* WAR for sy-producer sources */
51*61046927SAndroid Build Coastguard Worker    regmask_t needs_ss_scalar_war; /* scalar ALU write -> ALU write */
52*61046927SAndroid Build Coastguard Worker    regmask_t needs_ss_or_sy_scalar_war;
53*61046927SAndroid Build Coastguard Worker    regmask_t needs_sy;
54*61046927SAndroid Build Coastguard Worker    bool needs_ss_for_const;
55*61046927SAndroid Build Coastguard Worker 
56*61046927SAndroid Build Coastguard Worker    /* Each of these arrays contains the cycle when the corresponding register
57*61046927SAndroid Build Coastguard Worker     * becomes "ready" i.e. does not require any more nops. There is a special
58*61046927SAndroid Build Coastguard Worker     * mechanism to let ALU instructions read compatible (i.e. same halfness)
59*61046927SAndroid Build Coastguard Worker     * destinations of another ALU instruction with less delay, so this can
60*61046927SAndroid Build Coastguard Worker     * depend on what type the consuming instruction is, which is why there are
61*61046927SAndroid Build Coastguard Worker     * multiple arrays. The cycle is counted relative to the start of the block.
62*61046927SAndroid Build Coastguard Worker     */
63*61046927SAndroid Build Coastguard Worker 
64*61046927SAndroid Build Coastguard Worker    /* When ALU instructions reading the given full/half register will be ready.
65*61046927SAndroid Build Coastguard Worker     */
66*61046927SAndroid Build Coastguard Worker    struct ir3_nop_state alu_nop;
67*61046927SAndroid Build Coastguard Worker 
68*61046927SAndroid Build Coastguard Worker    /* When non-ALU (e.g. cat5) instructions reading the given full/half register
69*61046927SAndroid Build Coastguard Worker     * will be ready.
70*61046927SAndroid Build Coastguard Worker     */
71*61046927SAndroid Build Coastguard Worker    struct ir3_nop_state non_alu_nop;
72*61046927SAndroid Build Coastguard Worker 
73*61046927SAndroid Build Coastguard Worker    /* When p0.x-w, a0.x, and a1.x are ready. */
74*61046927SAndroid Build Coastguard Worker    unsigned pred_ready[4];
75*61046927SAndroid Build Coastguard Worker    unsigned addr_ready[2];
76*61046927SAndroid Build Coastguard Worker };
77*61046927SAndroid Build Coastguard Worker 
78*61046927SAndroid Build Coastguard Worker struct ir3_legalize_block_data {
79*61046927SAndroid Build Coastguard Worker    bool valid;
80*61046927SAndroid Build Coastguard Worker    struct ir3_legalize_state begin_state;
81*61046927SAndroid Build Coastguard Worker    struct ir3_legalize_state state;
82*61046927SAndroid Build Coastguard Worker };
83*61046927SAndroid Build Coastguard Worker 
84*61046927SAndroid Build Coastguard Worker static inline bool
needs_ss_war(struct ir3_legalize_state * state,struct ir3_register * dst,bool is_scalar_alu)85*61046927SAndroid Build Coastguard Worker needs_ss_war(struct ir3_legalize_state *state, struct ir3_register *dst,
86*61046927SAndroid Build Coastguard Worker              bool is_scalar_alu)
87*61046927SAndroid Build Coastguard Worker {
88*61046927SAndroid Build Coastguard Worker    if (regmask_get(&state->needs_ss_war, dst))
89*61046927SAndroid Build Coastguard Worker       return true;
90*61046927SAndroid Build Coastguard Worker    if (regmask_get(&state->needs_ss_or_sy_war, dst))
91*61046927SAndroid Build Coastguard Worker       return true;
92*61046927SAndroid Build Coastguard Worker 
93*61046927SAndroid Build Coastguard Worker    if (!is_scalar_alu) {
94*61046927SAndroid Build Coastguard Worker       if (regmask_get(&state->needs_ss_scalar_war, dst))
95*61046927SAndroid Build Coastguard Worker          return true;
96*61046927SAndroid Build Coastguard Worker       if (regmask_get(&state->needs_ss_or_sy_scalar_war, dst))
97*61046927SAndroid Build Coastguard Worker          return true;
98*61046927SAndroid Build Coastguard Worker    }
99*61046927SAndroid Build Coastguard Worker 
100*61046927SAndroid Build Coastguard Worker    return false;
101*61046927SAndroid Build Coastguard Worker }
102*61046927SAndroid Build Coastguard Worker 
103*61046927SAndroid Build Coastguard Worker static inline void
apply_ss(struct ir3_instruction * instr,struct ir3_legalize_state * state,bool mergedregs)104*61046927SAndroid Build Coastguard Worker apply_ss(struct ir3_instruction *instr,
105*61046927SAndroid Build Coastguard Worker          struct ir3_legalize_state *state,
106*61046927SAndroid Build Coastguard Worker          bool mergedregs)
107*61046927SAndroid Build Coastguard Worker {
108*61046927SAndroid Build Coastguard Worker    instr->flags |= IR3_INSTR_SS;
109*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_ss_war, mergedregs);
110*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_ss_or_sy_war, mergedregs);
111*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_ss, mergedregs);
112*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_ss_scalar_war, mergedregs);
113*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_ss_or_sy_scalar_war, mergedregs);
114*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_ss_scalar_full, mergedregs);
115*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_ss_scalar_half, mergedregs);
116*61046927SAndroid Build Coastguard Worker    state->needs_ss_for_const = false;
117*61046927SAndroid Build Coastguard Worker }
118*61046927SAndroid Build Coastguard Worker 
119*61046927SAndroid Build Coastguard Worker static inline void
apply_sy(struct ir3_instruction * instr,struct ir3_legalize_state * state,bool mergedregs)120*61046927SAndroid Build Coastguard Worker apply_sy(struct ir3_instruction *instr,
121*61046927SAndroid Build Coastguard Worker          struct ir3_legalize_state *state,
122*61046927SAndroid Build Coastguard Worker          bool mergedregs)
123*61046927SAndroid Build Coastguard Worker {
124*61046927SAndroid Build Coastguard Worker    instr->flags |= IR3_INSTR_SY;
125*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_sy, mergedregs);
126*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_ss_or_sy_war, mergedregs);
127*61046927SAndroid Build Coastguard Worker    regmask_init(&state->needs_ss_or_sy_scalar_war, mergedregs);
128*61046927SAndroid Build Coastguard Worker }
129*61046927SAndroid Build Coastguard Worker 
130*61046927SAndroid Build Coastguard Worker static bool
count_instruction(struct ir3_instruction * n,struct ir3_compiler * compiler)131*61046927SAndroid Build Coastguard Worker count_instruction(struct ir3_instruction *n, struct ir3_compiler *compiler)
132*61046927SAndroid Build Coastguard Worker {
133*61046927SAndroid Build Coastguard Worker    /* NOTE: don't count branch/jump since we don't know yet if they will
134*61046927SAndroid Build Coastguard Worker     * be eliminated later in resolve_jumps().. really should do that
135*61046927SAndroid Build Coastguard Worker     * earlier so we don't have this constraint.
136*61046927SAndroid Build Coastguard Worker     */
137*61046927SAndroid Build Coastguard Worker    return (is_alu(n) && !is_scalar_alu(n, compiler)) ||
138*61046927SAndroid Build Coastguard Worker       (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR) &&
139*61046927SAndroid Build Coastguard Worker            (n->opc != OPC_BRAA) && (n->opc != OPC_BRAO));
140*61046927SAndroid Build Coastguard Worker }
141*61046927SAndroid Build Coastguard Worker 
142*61046927SAndroid Build Coastguard Worker static unsigned *
get_ready_slot(struct ir3_legalize_state * state,struct ir3_register * reg,unsigned num,bool consumer_alu,bool matching_size)143*61046927SAndroid Build Coastguard Worker get_ready_slot(struct ir3_legalize_state *state,
144*61046927SAndroid Build Coastguard Worker                struct ir3_register *reg, unsigned num,
145*61046927SAndroid Build Coastguard Worker                bool consumer_alu, bool matching_size)
146*61046927SAndroid Build Coastguard Worker {
147*61046927SAndroid Build Coastguard Worker    if (reg->flags & IR3_REG_PREDICATE) {
148*61046927SAndroid Build Coastguard Worker       assert(num == reg->num);
149*61046927SAndroid Build Coastguard Worker       assert(reg_num(reg) == REG_P0);
150*61046927SAndroid Build Coastguard Worker       return &state->pred_ready[reg_comp(reg)];
151*61046927SAndroid Build Coastguard Worker    }
152*61046927SAndroid Build Coastguard Worker    if (reg->num == regid(REG_A0, 0))
153*61046927SAndroid Build Coastguard Worker       return &state->addr_ready[0];
154*61046927SAndroid Build Coastguard Worker    if (reg->num == regid(REG_A0, 1))
155*61046927SAndroid Build Coastguard Worker       return &state->addr_ready[1];
156*61046927SAndroid Build Coastguard Worker    struct ir3_nop_state *nop =
157*61046927SAndroid Build Coastguard Worker       consumer_alu ? &state->alu_nop : &state->non_alu_nop;
158*61046927SAndroid Build Coastguard Worker    assert(!(reg->flags & IR3_REG_SHARED));
159*61046927SAndroid Build Coastguard Worker    if (reg->flags & IR3_REG_HALF) {
160*61046927SAndroid Build Coastguard Worker       if (matching_size)
161*61046927SAndroid Build Coastguard Worker          return &nop->half_ready[num];
162*61046927SAndroid Build Coastguard Worker       else
163*61046927SAndroid Build Coastguard Worker          return &nop->full_ready[num / 2];
164*61046927SAndroid Build Coastguard Worker    } else {
165*61046927SAndroid Build Coastguard Worker       if (matching_size)
166*61046927SAndroid Build Coastguard Worker          return &nop->full_ready[num];
167*61046927SAndroid Build Coastguard Worker       /* If "num" is large enough, then it can't alias a half-reg because only
168*61046927SAndroid Build Coastguard Worker        * the first half of the full reg speace aliases half regs. Return NULL in
169*61046927SAndroid Build Coastguard Worker        * this case.
170*61046927SAndroid Build Coastguard Worker        */
171*61046927SAndroid Build Coastguard Worker       else if (num * 2 < ARRAY_SIZE(nop->half_ready))
172*61046927SAndroid Build Coastguard Worker          return &nop->half_ready[num * 2];
173*61046927SAndroid Build Coastguard Worker       else
174*61046927SAndroid Build Coastguard Worker          return NULL;
175*61046927SAndroid Build Coastguard Worker    }
176*61046927SAndroid Build Coastguard Worker }
177*61046927SAndroid Build Coastguard Worker 
178*61046927SAndroid Build Coastguard Worker static unsigned
delay_calc(struct ir3_legalize_state * state,struct ir3_instruction * instr,unsigned cycle)179*61046927SAndroid Build Coastguard Worker delay_calc(struct ir3_legalize_state *state,
180*61046927SAndroid Build Coastguard Worker            struct ir3_instruction *instr,
181*61046927SAndroid Build Coastguard Worker            unsigned cycle)
182*61046927SAndroid Build Coastguard Worker {
183*61046927SAndroid Build Coastguard Worker    /* As far as we know, shader outputs don't need any delay. */
184*61046927SAndroid Build Coastguard Worker    if (instr->opc == OPC_END || instr->opc == OPC_CHMASK)
185*61046927SAndroid Build Coastguard Worker       return 0;
186*61046927SAndroid Build Coastguard Worker 
187*61046927SAndroid Build Coastguard Worker    unsigned delay = 0;
188*61046927SAndroid Build Coastguard Worker    foreach_src_n (src, n, instr) {
189*61046927SAndroid Build Coastguard Worker       if (src->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SHARED))
190*61046927SAndroid Build Coastguard Worker          continue;
191*61046927SAndroid Build Coastguard Worker 
192*61046927SAndroid Build Coastguard Worker       unsigned elems = post_ra_reg_elems(src);
193*61046927SAndroid Build Coastguard Worker       unsigned num = post_ra_reg_num(src);
194*61046927SAndroid Build Coastguard Worker       unsigned src_cycle = cycle;
195*61046927SAndroid Build Coastguard Worker 
196*61046927SAndroid Build Coastguard Worker       /* gat and swz have scalar sources and each source is read in a
197*61046927SAndroid Build Coastguard Worker        * subsequent cycle.
198*61046927SAndroid Build Coastguard Worker        */
199*61046927SAndroid Build Coastguard Worker       if (instr->opc == OPC_GAT || instr->opc == OPC_SWZ)
200*61046927SAndroid Build Coastguard Worker          src_cycle += n;
201*61046927SAndroid Build Coastguard Worker 
202*61046927SAndroid Build Coastguard Worker       /* cat3 instructions consume their last source two cycles later, so they
203*61046927SAndroid Build Coastguard Worker        * only need a delay of 1.
204*61046927SAndroid Build Coastguard Worker        */
205*61046927SAndroid Build Coastguard Worker       if ((is_mad(instr->opc) || is_madsh(instr->opc)) && n == 2)
206*61046927SAndroid Build Coastguard Worker          src_cycle += 2;
207*61046927SAndroid Build Coastguard Worker 
208*61046927SAndroid Build Coastguard Worker       for (unsigned elem = 0; elem < elems; elem++, num++) {
209*61046927SAndroid Build Coastguard Worker          unsigned ready_cycle =
210*61046927SAndroid Build Coastguard Worker             *get_ready_slot(state, src, num, is_alu(instr), true);
211*61046927SAndroid Build Coastguard Worker          delay = MAX2(delay, MAX2(ready_cycle, src_cycle) - src_cycle);
212*61046927SAndroid Build Coastguard Worker 
213*61046927SAndroid Build Coastguard Worker          /* Increment cycle for ALU instructions with (rptN) where sources are
214*61046927SAndroid Build Coastguard Worker           * read each subsequent cycle.
215*61046927SAndroid Build Coastguard Worker           */
216*61046927SAndroid Build Coastguard Worker          if (instr->repeat && !(src->flags & IR3_REG_RELATIV))
217*61046927SAndroid Build Coastguard Worker             src_cycle++;
218*61046927SAndroid Build Coastguard Worker       }
219*61046927SAndroid Build Coastguard Worker    }
220*61046927SAndroid Build Coastguard Worker 
221*61046927SAndroid Build Coastguard Worker    return delay;
222*61046927SAndroid Build Coastguard Worker }
223*61046927SAndroid Build Coastguard Worker 
224*61046927SAndroid Build Coastguard Worker static void
delay_update(struct ir3_legalize_state * state,struct ir3_instruction * instr,unsigned cycle,bool mergedregs)225*61046927SAndroid Build Coastguard Worker delay_update(struct ir3_legalize_state *state,
226*61046927SAndroid Build Coastguard Worker              struct ir3_instruction *instr,
227*61046927SAndroid Build Coastguard Worker              unsigned cycle,
228*61046927SAndroid Build Coastguard Worker              bool mergedregs)
229*61046927SAndroid Build Coastguard Worker {
230*61046927SAndroid Build Coastguard Worker    if (writes_addr1(instr) && instr->block->in_early_preamble)
231*61046927SAndroid Build Coastguard Worker       return;
232*61046927SAndroid Build Coastguard Worker 
233*61046927SAndroid Build Coastguard Worker    foreach_dst_n (dst, n, instr) {
234*61046927SAndroid Build Coastguard Worker       unsigned elems = post_ra_reg_elems(dst);
235*61046927SAndroid Build Coastguard Worker       unsigned num = post_ra_reg_num(dst);
236*61046927SAndroid Build Coastguard Worker       unsigned dst_cycle = cycle;
237*61046927SAndroid Build Coastguard Worker 
238*61046927SAndroid Build Coastguard Worker       /* sct and swz have scalar destinations and each destination is written in
239*61046927SAndroid Build Coastguard Worker        * a subsequent cycle.
240*61046927SAndroid Build Coastguard Worker        */
241*61046927SAndroid Build Coastguard Worker       if (instr->opc == OPC_SCT || instr->opc == OPC_SWZ)
242*61046927SAndroid Build Coastguard Worker          dst_cycle += n;
243*61046927SAndroid Build Coastguard Worker 
244*61046927SAndroid Build Coastguard Worker       /* For relative accesses with (rptN), we have no way of knowing which
245*61046927SAndroid Build Coastguard Worker        * component is accessed when, so we have to assume the worst and mark
246*61046927SAndroid Build Coastguard Worker        * every array member as being written at the end.
247*61046927SAndroid Build Coastguard Worker        */
248*61046927SAndroid Build Coastguard Worker       if (dst->flags & IR3_REG_RELATIV)
249*61046927SAndroid Build Coastguard Worker          dst_cycle += instr->repeat;
250*61046927SAndroid Build Coastguard Worker 
251*61046927SAndroid Build Coastguard Worker       if (dst->flags & IR3_REG_SHARED)
252*61046927SAndroid Build Coastguard Worker          continue;
253*61046927SAndroid Build Coastguard Worker 
254*61046927SAndroid Build Coastguard Worker       for (unsigned elem = 0; elem < elems; elem++, num++) {
255*61046927SAndroid Build Coastguard Worker          for (unsigned consumer_alu = 0; consumer_alu < 2; consumer_alu++) {
256*61046927SAndroid Build Coastguard Worker             for (unsigned matching_size = 0; matching_size < 2; matching_size++) {
257*61046927SAndroid Build Coastguard Worker                unsigned *ready_slot =
258*61046927SAndroid Build Coastguard Worker                   get_ready_slot(state, dst, num, consumer_alu, matching_size);
259*61046927SAndroid Build Coastguard Worker 
260*61046927SAndroid Build Coastguard Worker                if (!ready_slot)
261*61046927SAndroid Build Coastguard Worker                   continue;
262*61046927SAndroid Build Coastguard Worker 
263*61046927SAndroid Build Coastguard Worker                bool reset_ready_slot = false;
264*61046927SAndroid Build Coastguard Worker                unsigned delay = 0;
265*61046927SAndroid Build Coastguard Worker                if (!is_alu(instr)) {
266*61046927SAndroid Build Coastguard Worker                   /* Apparently writes that require (ss) or (sy) are
267*61046927SAndroid Build Coastguard Worker                    * synchronized against previous writes, so consumers don't
268*61046927SAndroid Build Coastguard Worker                    * have to wait for any previous overlapping ALU instructions
269*61046927SAndroid Build Coastguard Worker                    * to complete.
270*61046927SAndroid Build Coastguard Worker                    */
271*61046927SAndroid Build Coastguard Worker                   reset_ready_slot = true;
272*61046927SAndroid Build Coastguard Worker                } else if ((dst->flags & IR3_REG_PREDICATE) ||
273*61046927SAndroid Build Coastguard Worker                           reg_num(dst) == REG_A0) {
274*61046927SAndroid Build Coastguard Worker                   delay = 6;
275*61046927SAndroid Build Coastguard Worker                   if (!matching_size)
276*61046927SAndroid Build Coastguard Worker                      continue;
277*61046927SAndroid Build Coastguard Worker                } else {
278*61046927SAndroid Build Coastguard Worker                   delay = (consumer_alu && matching_size) ? 3 : 6;
279*61046927SAndroid Build Coastguard Worker                }
280*61046927SAndroid Build Coastguard Worker 
281*61046927SAndroid Build Coastguard Worker                if (!matching_size) {
282*61046927SAndroid Build Coastguard Worker                   for (unsigned i = 0; i < reg_elem_size(dst); i++) {
283*61046927SAndroid Build Coastguard Worker                      ready_slot[i] =
284*61046927SAndroid Build Coastguard Worker                         reset_ready_slot ? 0 :
285*61046927SAndroid Build Coastguard Worker                         MAX2(ready_slot[i], dst_cycle + delay);
286*61046927SAndroid Build Coastguard Worker                   }
287*61046927SAndroid Build Coastguard Worker                } else {
288*61046927SAndroid Build Coastguard Worker                   *ready_slot =
289*61046927SAndroid Build Coastguard Worker                      reset_ready_slot ? 0 :
290*61046927SAndroid Build Coastguard Worker                      MAX2(*ready_slot, dst_cycle + delay);
291*61046927SAndroid Build Coastguard Worker                }
292*61046927SAndroid Build Coastguard Worker             }
293*61046927SAndroid Build Coastguard Worker          }
294*61046927SAndroid Build Coastguard Worker 
295*61046927SAndroid Build Coastguard Worker          /* Increment cycle for ALU instructions with (rptN) where destinations
296*61046927SAndroid Build Coastguard Worker           * are written each subsequent cycle.
297*61046927SAndroid Build Coastguard Worker           */
298*61046927SAndroid Build Coastguard Worker          if (instr->repeat && !(dst->flags & IR3_REG_RELATIV))
299*61046927SAndroid Build Coastguard Worker             dst_cycle++;
300*61046927SAndroid Build Coastguard Worker       }
301*61046927SAndroid Build Coastguard Worker    }
302*61046927SAndroid Build Coastguard Worker }
303*61046927SAndroid Build Coastguard Worker 
304*61046927SAndroid Build Coastguard Worker /* We want to evaluate each block from the position of any other
305*61046927SAndroid Build Coastguard Worker  * predecessor block, in order that the flags set are the union of
306*61046927SAndroid Build Coastguard Worker  * all possible program paths.
307*61046927SAndroid Build Coastguard Worker  *
308*61046927SAndroid Build Coastguard Worker  * To do this, we need to know the output state (needs_ss/ss_war/sy)
309*61046927SAndroid Build Coastguard Worker  * of all predecessor blocks.  The tricky thing is loops, which mean
310*61046927SAndroid Build Coastguard Worker  * that we can't simply recursively process each predecessor block
311*61046927SAndroid Build Coastguard Worker  * before legalizing the current block.
312*61046927SAndroid Build Coastguard Worker  *
313*61046927SAndroid Build Coastguard Worker  * How we handle that is by looping over all the blocks until the
314*61046927SAndroid Build Coastguard Worker  * results converge.  If the output state of a given block changes
315*61046927SAndroid Build Coastguard Worker  * in a given pass, this means that all successor blocks are not
316*61046927SAndroid Build Coastguard Worker  * yet fully legalized.
317*61046927SAndroid Build Coastguard Worker  */
318*61046927SAndroid Build Coastguard Worker 
319*61046927SAndroid Build Coastguard Worker static bool
legalize_block(struct ir3_legalize_ctx * ctx,struct ir3_block * block)320*61046927SAndroid Build Coastguard Worker legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
321*61046927SAndroid Build Coastguard Worker {
322*61046927SAndroid Build Coastguard Worker    struct ir3_legalize_block_data *bd = block->data;
323*61046927SAndroid Build Coastguard Worker 
324*61046927SAndroid Build Coastguard Worker    if (bd->valid)
325*61046927SAndroid Build Coastguard Worker       return false;
326*61046927SAndroid Build Coastguard Worker 
327*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *last_n = NULL;
328*61046927SAndroid Build Coastguard Worker    struct list_head instr_list;
329*61046927SAndroid Build Coastguard Worker    struct ir3_legalize_state prev_state = bd->state;
330*61046927SAndroid Build Coastguard Worker    struct ir3_legalize_state *state = &bd->begin_state;
331*61046927SAndroid Build Coastguard Worker    bool last_input_needs_ss = false;
332*61046927SAndroid Build Coastguard Worker    bool mergedregs = ctx->so->mergedregs;
333*61046927SAndroid Build Coastguard Worker 
334*61046927SAndroid Build Coastguard Worker    /* Our input state is the OR of all predecessor blocks' state.
335*61046927SAndroid Build Coastguard Worker     *
336*61046927SAndroid Build Coastguard Worker     * Why don't we just zero the state at the beginning before merging in the
337*61046927SAndroid Build Coastguard Worker     * predecessors? Because otherwise updates may not be a "lattice refinement",
338*61046927SAndroid Build Coastguard Worker     * i.e. needs_ss may go from true to false for some register due to a (ss) we
339*61046927SAndroid Build Coastguard Worker     * inserted the second time around (and the same for (sy)). This means that
340*61046927SAndroid Build Coastguard Worker     * there's no solid guarantee the algorithm will converge, and in theory
341*61046927SAndroid Build Coastguard Worker     * there may be infinite loops where we fight over the placment of an (ss).
342*61046927SAndroid Build Coastguard Worker     */
343*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < block->predecessors_count; i++) {
344*61046927SAndroid Build Coastguard Worker       struct ir3_block *predecessor = block->predecessors[i];
345*61046927SAndroid Build Coastguard Worker       struct ir3_legalize_block_data *pbd = predecessor->data;
346*61046927SAndroid Build Coastguard Worker       struct ir3_legalize_state *pstate = &pbd->state;
347*61046927SAndroid Build Coastguard Worker 
348*61046927SAndroid Build Coastguard Worker       /* Our input (ss)/(sy) state is based on OR'ing the output
349*61046927SAndroid Build Coastguard Worker        * state of all our predecessor blocks
350*61046927SAndroid Build Coastguard Worker        */
351*61046927SAndroid Build Coastguard Worker       regmask_or(&state->needs_ss, &state->needs_ss, &pstate->needs_ss);
352*61046927SAndroid Build Coastguard Worker       regmask_or(&state->needs_ss_war, &state->needs_ss_war,
353*61046927SAndroid Build Coastguard Worker                  &pstate->needs_ss_war);
354*61046927SAndroid Build Coastguard Worker       regmask_or(&state->needs_ss_or_sy_war, &state->needs_ss_or_sy_war,
355*61046927SAndroid Build Coastguard Worker                  &pstate->needs_ss_or_sy_war);
356*61046927SAndroid Build Coastguard Worker       regmask_or(&state->needs_sy, &state->needs_sy, &pstate->needs_sy);
357*61046927SAndroid Build Coastguard Worker       state->needs_ss_for_const |= pstate->needs_ss_for_const;
358*61046927SAndroid Build Coastguard Worker 
359*61046927SAndroid Build Coastguard Worker       /* Our nop state is the max of the predecessor blocks */
360*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < ARRAY_SIZE(state->pred_ready); i++)
361*61046927SAndroid Build Coastguard Worker          state->pred_ready[i] = MAX2(state->pred_ready[i],
362*61046927SAndroid Build Coastguard Worker                                      pstate->pred_ready[i]);
363*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < ARRAY_SIZE(state->alu_nop.full_ready); i++) {
364*61046927SAndroid Build Coastguard Worker          state->alu_nop.full_ready[i] = MAX2(state->alu_nop.full_ready[i],
365*61046927SAndroid Build Coastguard Worker                                              pstate->alu_nop.full_ready[i]);
366*61046927SAndroid Build Coastguard Worker          state->alu_nop.half_ready[i] = MAX2(state->alu_nop.half_ready[i],
367*61046927SAndroid Build Coastguard Worker                                              pstate->alu_nop.half_ready[i]);
368*61046927SAndroid Build Coastguard Worker          state->non_alu_nop.full_ready[i] = MAX2(state->non_alu_nop.full_ready[i],
369*61046927SAndroid Build Coastguard Worker                                                  pstate->non_alu_nop.full_ready[i]);
370*61046927SAndroid Build Coastguard Worker          state->non_alu_nop.half_ready[i] = MAX2(state->non_alu_nop.half_ready[i],
371*61046927SAndroid Build Coastguard Worker                                                  pstate->non_alu_nop.half_ready[i]);
372*61046927SAndroid Build Coastguard Worker       }
373*61046927SAndroid Build Coastguard Worker    }
374*61046927SAndroid Build Coastguard Worker 
375*61046927SAndroid Build Coastguard Worker    /* We need to take phsyical-only edges into account when tracking shared
376*61046927SAndroid Build Coastguard Worker     * registers.
377*61046927SAndroid Build Coastguard Worker     */
378*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
379*61046927SAndroid Build Coastguard Worker       struct ir3_block *predecessor = block->physical_predecessors[i];
380*61046927SAndroid Build Coastguard Worker       struct ir3_legalize_block_data *pbd = predecessor->data;
381*61046927SAndroid Build Coastguard Worker       struct ir3_legalize_state *pstate = &pbd->state;
382*61046927SAndroid Build Coastguard Worker 
383*61046927SAndroid Build Coastguard Worker       regmask_or_shared(&state->needs_ss, &state->needs_ss, &pstate->needs_ss);
384*61046927SAndroid Build Coastguard Worker       regmask_or_shared(&state->needs_ss_scalar_full,
385*61046927SAndroid Build Coastguard Worker                         &state->needs_ss_scalar_full,
386*61046927SAndroid Build Coastguard Worker                         &pstate->needs_ss_scalar_full);
387*61046927SAndroid Build Coastguard Worker       regmask_or_shared(&state->needs_ss_scalar_half,
388*61046927SAndroid Build Coastguard Worker                         &state->needs_ss_scalar_half,
389*61046927SAndroid Build Coastguard Worker                         &pstate->needs_ss_scalar_half);
390*61046927SAndroid Build Coastguard Worker       regmask_or_shared(&state->needs_ss_scalar_war, &state->needs_ss_scalar_war,
391*61046927SAndroid Build Coastguard Worker                         &pstate->needs_ss_scalar_war);
392*61046927SAndroid Build Coastguard Worker       regmask_or_shared(&state->needs_ss_or_sy_scalar_war,
393*61046927SAndroid Build Coastguard Worker                         &state->needs_ss_or_sy_scalar_war,
394*61046927SAndroid Build Coastguard Worker                         &pstate->needs_ss_or_sy_scalar_war);
395*61046927SAndroid Build Coastguard Worker    }
396*61046927SAndroid Build Coastguard Worker 
397*61046927SAndroid Build Coastguard Worker    memcpy(&bd->state, state, sizeof(*state));
398*61046927SAndroid Build Coastguard Worker    state = &bd->state;
399*61046927SAndroid Build Coastguard Worker 
400*61046927SAndroid Build Coastguard Worker    unsigned input_count = 0;
401*61046927SAndroid Build Coastguard Worker 
402*61046927SAndroid Build Coastguard Worker    foreach_instr (n, &block->instr_list) {
403*61046927SAndroid Build Coastguard Worker       if (is_input(n)) {
404*61046927SAndroid Build Coastguard Worker          input_count++;
405*61046927SAndroid Build Coastguard Worker       }
406*61046927SAndroid Build Coastguard Worker    }
407*61046927SAndroid Build Coastguard Worker 
408*61046927SAndroid Build Coastguard Worker    unsigned inputs_remaining = input_count;
409*61046927SAndroid Build Coastguard Worker 
410*61046927SAndroid Build Coastguard Worker    /* Either inputs are in the first block or we expect inputs to be released
411*61046927SAndroid Build Coastguard Worker     * with the end of the program.
412*61046927SAndroid Build Coastguard Worker     */
413*61046927SAndroid Build Coastguard Worker    assert(input_count == 0 || !ctx->early_input_release ||
414*61046927SAndroid Build Coastguard Worker           block == ir3_after_preamble(block->shader));
415*61046927SAndroid Build Coastguard Worker 
416*61046927SAndroid Build Coastguard Worker    /* remove all the instructions from the list, we'll be adding
417*61046927SAndroid Build Coastguard Worker     * them back in as we go
418*61046927SAndroid Build Coastguard Worker     */
419*61046927SAndroid Build Coastguard Worker    list_replace(&block->instr_list, &instr_list);
420*61046927SAndroid Build Coastguard Worker    list_inithead(&block->instr_list);
421*61046927SAndroid Build Coastguard Worker 
422*61046927SAndroid Build Coastguard Worker    unsigned cycle = 0;
423*61046927SAndroid Build Coastguard Worker 
424*61046927SAndroid Build Coastguard Worker    foreach_instr_safe (n, &instr_list) {
425*61046927SAndroid Build Coastguard Worker       unsigned i;
426*61046927SAndroid Build Coastguard Worker 
427*61046927SAndroid Build Coastguard Worker       n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY);
428*61046927SAndroid Build Coastguard Worker 
429*61046927SAndroid Build Coastguard Worker       /* _meta::tex_prefetch instructions removed later in
430*61046927SAndroid Build Coastguard Worker        * collect_tex_prefetches()
431*61046927SAndroid Build Coastguard Worker        */
432*61046927SAndroid Build Coastguard Worker       if (is_meta(n) && (n->opc != OPC_META_TEX_PREFETCH))
433*61046927SAndroid Build Coastguard Worker          continue;
434*61046927SAndroid Build Coastguard Worker 
435*61046927SAndroid Build Coastguard Worker       if (is_input(n)) {
436*61046927SAndroid Build Coastguard Worker          struct ir3_register *inloc = n->srcs[0];
437*61046927SAndroid Build Coastguard Worker          assert(inloc->flags & IR3_REG_IMMED);
438*61046927SAndroid Build Coastguard Worker 
439*61046927SAndroid Build Coastguard Worker          int last_inloc =
440*61046927SAndroid Build Coastguard Worker             inloc->iim_val + ((inloc->flags & IR3_REG_R) ? n->repeat : 0);
441*61046927SAndroid Build Coastguard Worker          ctx->max_bary = MAX2(ctx->max_bary, last_inloc);
442*61046927SAndroid Build Coastguard Worker       }
443*61046927SAndroid Build Coastguard Worker 
444*61046927SAndroid Build Coastguard Worker       if ((last_n && is_barrier(last_n)) || n->opc == OPC_SHPE) {
445*61046927SAndroid Build Coastguard Worker          apply_ss(n, state, mergedregs);
446*61046927SAndroid Build Coastguard Worker          apply_sy(n, state, mergedregs);
447*61046927SAndroid Build Coastguard Worker          last_input_needs_ss = false;
448*61046927SAndroid Build Coastguard Worker       }
449*61046927SAndroid Build Coastguard Worker 
450*61046927SAndroid Build Coastguard Worker       if (last_n && (last_n->opc == OPC_PREDT)) {
451*61046927SAndroid Build Coastguard Worker          apply_ss(n, state, mergedregs);
452*61046927SAndroid Build Coastguard Worker       }
453*61046927SAndroid Build Coastguard Worker 
454*61046927SAndroid Build Coastguard Worker       bool n_is_scalar_alu = is_scalar_alu(n, ctx->compiler);
455*61046927SAndroid Build Coastguard Worker 
456*61046927SAndroid Build Coastguard Worker       /* NOTE: consider dst register too.. it could happen that
457*61046927SAndroid Build Coastguard Worker        * texture sample instruction (for example) writes some
458*61046927SAndroid Build Coastguard Worker        * components which are unused.  A subsequent instruction
459*61046927SAndroid Build Coastguard Worker        * that writes the same register can race w/ the sam instr
460*61046927SAndroid Build Coastguard Worker        * resulting in undefined results:
461*61046927SAndroid Build Coastguard Worker        */
462*61046927SAndroid Build Coastguard Worker       for (i = 0; i < n->dsts_count + n->srcs_count; i++) {
463*61046927SAndroid Build Coastguard Worker          struct ir3_register *reg;
464*61046927SAndroid Build Coastguard Worker          if (i < n->dsts_count)
465*61046927SAndroid Build Coastguard Worker             reg = n->dsts[i];
466*61046927SAndroid Build Coastguard Worker          else
467*61046927SAndroid Build Coastguard Worker             reg = n->srcs[i - n->dsts_count];
468*61046927SAndroid Build Coastguard Worker 
469*61046927SAndroid Build Coastguard Worker          if (reg_gpr(reg)) {
470*61046927SAndroid Build Coastguard Worker 
471*61046927SAndroid Build Coastguard Worker             /* TODO: we probably only need (ss) for alu
472*61046927SAndroid Build Coastguard Worker              * instr consuming sfu result.. need to make
473*61046927SAndroid Build Coastguard Worker              * some tests for both this and (sy)..
474*61046927SAndroid Build Coastguard Worker              */
475*61046927SAndroid Build Coastguard Worker             if (regmask_get(&state->needs_ss, reg)) {
476*61046927SAndroid Build Coastguard Worker                apply_ss(n, state, mergedregs);
477*61046927SAndroid Build Coastguard Worker                last_input_needs_ss = false;
478*61046927SAndroid Build Coastguard Worker             }
479*61046927SAndroid Build Coastguard Worker 
480*61046927SAndroid Build Coastguard Worker             /* There is a fast feedback path for scalar ALU instructions which
481*61046927SAndroid Build Coastguard Worker              * only takes 1 cycle of latency, similar to the normal 3 cycle
482*61046927SAndroid Build Coastguard Worker              * latency path for ALU instructions. For this fast path the
483*61046927SAndroid Build Coastguard Worker              * producer and consumer must use the same register size (i.e. no
484*61046927SAndroid Build Coastguard Worker              * writing a full register and then reading half of it or vice
485*61046927SAndroid Build Coastguard Worker              * versa). If we don't hit this path, either because of a mismatched
486*61046927SAndroid Build Coastguard Worker              * size or a read via the regular ALU, then the write latency is
487*61046927SAndroid Build Coastguard Worker              * variable and we must use (ss) to wait for the scalar ALU. This is
488*61046927SAndroid Build Coastguard Worker              * different from the fixed 6 cycle latency for mismatched vector
489*61046927SAndroid Build Coastguard Worker              * ALU accesses.
490*61046927SAndroid Build Coastguard Worker              */
491*61046927SAndroid Build Coastguard Worker             if (n_is_scalar_alu) {
492*61046927SAndroid Build Coastguard Worker                /* Check if we have a mismatched size RaW dependency */
493*61046927SAndroid Build Coastguard Worker                if (regmask_get((reg->flags & IR3_REG_HALF) ?
494*61046927SAndroid Build Coastguard Worker                                &state->needs_ss_scalar_half :
495*61046927SAndroid Build Coastguard Worker                                &state->needs_ss_scalar_full, reg)) {
496*61046927SAndroid Build Coastguard Worker                   apply_ss(n, state, mergedregs);
497*61046927SAndroid Build Coastguard Worker                   last_input_needs_ss = false;
498*61046927SAndroid Build Coastguard Worker                }
499*61046927SAndroid Build Coastguard Worker             } else {
500*61046927SAndroid Build Coastguard Worker                /* check if we have a scalar -> vector RaW dependency */
501*61046927SAndroid Build Coastguard Worker                if (regmask_get(&state->needs_ss_scalar_half, reg) ||
502*61046927SAndroid Build Coastguard Worker                    regmask_get(&state->needs_ss_scalar_full, reg)) {
503*61046927SAndroid Build Coastguard Worker                   apply_ss(n, state, mergedregs);
504*61046927SAndroid Build Coastguard Worker                   last_input_needs_ss = false;
505*61046927SAndroid Build Coastguard Worker                }
506*61046927SAndroid Build Coastguard Worker             }
507*61046927SAndroid Build Coastguard Worker 
508*61046927SAndroid Build Coastguard Worker             if (regmask_get(&state->needs_sy, reg)) {
509*61046927SAndroid Build Coastguard Worker                apply_sy(n, state, mergedregs);
510*61046927SAndroid Build Coastguard Worker             }
511*61046927SAndroid Build Coastguard Worker          } else if ((reg->flags & IR3_REG_CONST)) {
512*61046927SAndroid Build Coastguard Worker             if (state->needs_ss_for_const) {
513*61046927SAndroid Build Coastguard Worker                apply_ss(n, state, mergedregs);
514*61046927SAndroid Build Coastguard Worker                last_input_needs_ss = false;
515*61046927SAndroid Build Coastguard Worker             }
516*61046927SAndroid Build Coastguard Worker          } else if (reg_is_addr1(reg) && block->in_early_preamble) {
517*61046927SAndroid Build Coastguard Worker             if (regmask_get(&state->needs_ss, reg)) {
518*61046927SAndroid Build Coastguard Worker                apply_ss(n, state, mergedregs);
519*61046927SAndroid Build Coastguard Worker                last_input_needs_ss = false;
520*61046927SAndroid Build Coastguard Worker             }
521*61046927SAndroid Build Coastguard Worker          }
522*61046927SAndroid Build Coastguard Worker       }
523*61046927SAndroid Build Coastguard Worker 
524*61046927SAndroid Build Coastguard Worker       foreach_dst (reg, n) {
525*61046927SAndroid Build Coastguard Worker          if (needs_ss_war(state, reg, n_is_scalar_alu)) {
526*61046927SAndroid Build Coastguard Worker             apply_ss(n, state, mergedregs);
527*61046927SAndroid Build Coastguard Worker             last_input_needs_ss = false;
528*61046927SAndroid Build Coastguard Worker          }
529*61046927SAndroid Build Coastguard Worker       }
530*61046927SAndroid Build Coastguard Worker 
531*61046927SAndroid Build Coastguard Worker       /* I'm not exactly what this is for, but it seems we need this on every
532*61046927SAndroid Build Coastguard Worker        * mova1 in early preambles.
533*61046927SAndroid Build Coastguard Worker        */
534*61046927SAndroid Build Coastguard Worker       if (writes_addr1(n) && block->in_early_preamble)
535*61046927SAndroid Build Coastguard Worker          n->srcs[0]->flags |= IR3_REG_R;
536*61046927SAndroid Build Coastguard Worker 
537*61046927SAndroid Build Coastguard Worker       /* cat5+ does not have an (ss) bit, if needed we need to
538*61046927SAndroid Build Coastguard Worker        * insert a nop to carry the sync flag.  Would be kinda
539*61046927SAndroid Build Coastguard Worker        * clever if we were aware of this during scheduling, but
540*61046927SAndroid Build Coastguard Worker        * this should be a pretty rare case:
541*61046927SAndroid Build Coastguard Worker        */
542*61046927SAndroid Build Coastguard Worker       if ((n->flags & IR3_INSTR_SS) && (opc_cat(n->opc) >= 5)) {
543*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *nop;
544*61046927SAndroid Build Coastguard Worker          nop = ir3_NOP(block);
545*61046927SAndroid Build Coastguard Worker          nop->flags |= IR3_INSTR_SS;
546*61046927SAndroid Build Coastguard Worker          n->flags &= ~IR3_INSTR_SS;
547*61046927SAndroid Build Coastguard Worker          last_n = nop;
548*61046927SAndroid Build Coastguard Worker          cycle++;
549*61046927SAndroid Build Coastguard Worker       }
550*61046927SAndroid Build Coastguard Worker 
551*61046927SAndroid Build Coastguard Worker       unsigned delay = delay_calc(state, n, cycle);
552*61046927SAndroid Build Coastguard Worker 
553*61046927SAndroid Build Coastguard Worker       /* NOTE: I think the nopN encoding works for a5xx and
554*61046927SAndroid Build Coastguard Worker        * probably a4xx, but not a3xx.  So far only tested on
555*61046927SAndroid Build Coastguard Worker        * a6xx.
556*61046927SAndroid Build Coastguard Worker        */
557*61046927SAndroid Build Coastguard Worker 
558*61046927SAndroid Build Coastguard Worker       if ((delay > 0) && (ctx->compiler->gen >= 6) && last_n &&
559*61046927SAndroid Build Coastguard Worker           !n_is_scalar_alu &&
560*61046927SAndroid Build Coastguard Worker           ((opc_cat(last_n->opc) == 2) || (opc_cat(last_n->opc) == 3)) &&
561*61046927SAndroid Build Coastguard Worker           (last_n->repeat == 0)) {
562*61046927SAndroid Build Coastguard Worker          /* the previous cat2/cat3 instruction can encode at most 3 nop's: */
563*61046927SAndroid Build Coastguard Worker          unsigned transfer = MIN2(delay, 3 - last_n->nop);
564*61046927SAndroid Build Coastguard Worker          last_n->nop += transfer;
565*61046927SAndroid Build Coastguard Worker          delay -= transfer;
566*61046927SAndroid Build Coastguard Worker          cycle += transfer;
567*61046927SAndroid Build Coastguard Worker       }
568*61046927SAndroid Build Coastguard Worker 
569*61046927SAndroid Build Coastguard Worker       if ((delay > 0) && last_n && (last_n->opc == OPC_NOP)) {
570*61046927SAndroid Build Coastguard Worker          /* the previous nop can encode at most 5 repeats: */
571*61046927SAndroid Build Coastguard Worker          unsigned transfer = MIN2(delay, 5 - last_n->repeat);
572*61046927SAndroid Build Coastguard Worker          last_n->repeat += transfer;
573*61046927SAndroid Build Coastguard Worker          delay -= transfer;
574*61046927SAndroid Build Coastguard Worker          cycle += transfer;
575*61046927SAndroid Build Coastguard Worker       }
576*61046927SAndroid Build Coastguard Worker 
577*61046927SAndroid Build Coastguard Worker       if (delay > 0) {
578*61046927SAndroid Build Coastguard Worker          assert(delay <= 6);
579*61046927SAndroid Build Coastguard Worker          ir3_NOP(block)->repeat = delay - 1;
580*61046927SAndroid Build Coastguard Worker          cycle += delay;
581*61046927SAndroid Build Coastguard Worker       }
582*61046927SAndroid Build Coastguard Worker 
583*61046927SAndroid Build Coastguard Worker       if (ctx->compiler->samgq_workaround &&
584*61046927SAndroid Build Coastguard Worker           ctx->type != MESA_SHADER_FRAGMENT &&
585*61046927SAndroid Build Coastguard Worker           ctx->type != MESA_SHADER_COMPUTE && n->opc == OPC_SAMGQ) {
586*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *samgp;
587*61046927SAndroid Build Coastguard Worker 
588*61046927SAndroid Build Coastguard Worker          list_delinit(&n->node);
589*61046927SAndroid Build Coastguard Worker 
590*61046927SAndroid Build Coastguard Worker          for (i = 0; i < 4; i++) {
591*61046927SAndroid Build Coastguard Worker             samgp = ir3_instr_clone(n);
592*61046927SAndroid Build Coastguard Worker             samgp->opc = OPC_SAMGP0 + i;
593*61046927SAndroid Build Coastguard Worker             if (i > 1)
594*61046927SAndroid Build Coastguard Worker                samgp->flags |= IR3_INSTR_SY;
595*61046927SAndroid Build Coastguard Worker          }
596*61046927SAndroid Build Coastguard Worker       } else {
597*61046927SAndroid Build Coastguard Worker          list_delinit(&n->node);
598*61046927SAndroid Build Coastguard Worker          list_addtail(&n->node, &block->instr_list);
599*61046927SAndroid Build Coastguard Worker       }
600*61046927SAndroid Build Coastguard Worker 
601*61046927SAndroid Build Coastguard Worker       if (is_sfu(n))
602*61046927SAndroid Build Coastguard Worker          regmask_set(&state->needs_ss, n->dsts[0]);
603*61046927SAndroid Build Coastguard Worker 
604*61046927SAndroid Build Coastguard Worker       foreach_dst (dst, n) {
605*61046927SAndroid Build Coastguard Worker          if (dst->flags & IR3_REG_SHARED) {
606*61046927SAndroid Build Coastguard Worker             if (n_is_scalar_alu) {
607*61046927SAndroid Build Coastguard Worker                if (dst->flags & IR3_REG_HALF)
608*61046927SAndroid Build Coastguard Worker                   regmask_set(&state->needs_ss_scalar_full, dst);
609*61046927SAndroid Build Coastguard Worker                else
610*61046927SAndroid Build Coastguard Worker                   regmask_set(&state->needs_ss_scalar_half, dst);
611*61046927SAndroid Build Coastguard Worker             } else {
612*61046927SAndroid Build Coastguard Worker                regmask_set(&state->needs_ss, dst);
613*61046927SAndroid Build Coastguard Worker             }
614*61046927SAndroid Build Coastguard Worker          } else if (reg_is_addr1(dst) && block->in_early_preamble) {
615*61046927SAndroid Build Coastguard Worker             regmask_set(&state->needs_ss, dst);
616*61046927SAndroid Build Coastguard Worker          }
617*61046927SAndroid Build Coastguard Worker       }
618*61046927SAndroid Build Coastguard Worker 
619*61046927SAndroid Build Coastguard Worker       if (is_tex_or_prefetch(n) && n->dsts_count > 0) {
620*61046927SAndroid Build Coastguard Worker          regmask_set(&state->needs_sy, n->dsts[0]);
621*61046927SAndroid Build Coastguard Worker          if (n->opc == OPC_META_TEX_PREFETCH)
622*61046927SAndroid Build Coastguard Worker             ctx->has_tex_prefetch = true;
623*61046927SAndroid Build Coastguard Worker       } else if (n->opc == OPC_RESINFO && n->dsts_count > 0) {
624*61046927SAndroid Build Coastguard Worker          regmask_set(&state->needs_ss, n->dsts[0]);
625*61046927SAndroid Build Coastguard Worker          ir3_NOP(block)->flags |= IR3_INSTR_SS;
626*61046927SAndroid Build Coastguard Worker          last_input_needs_ss = false;
627*61046927SAndroid Build Coastguard Worker       } else if (is_load(n)) {
628*61046927SAndroid Build Coastguard Worker          if (is_local_mem_load(n))
629*61046927SAndroid Build Coastguard Worker             regmask_set(&state->needs_ss, n->dsts[0]);
630*61046927SAndroid Build Coastguard Worker          else
631*61046927SAndroid Build Coastguard Worker             regmask_set(&state->needs_sy, n->dsts[0]);
632*61046927SAndroid Build Coastguard Worker       } else if (is_atomic(n->opc)) {
633*61046927SAndroid Build Coastguard Worker          if (is_bindless_atomic(n->opc)) {
634*61046927SAndroid Build Coastguard Worker             regmask_set(&state->needs_sy, n->srcs[2]);
635*61046927SAndroid Build Coastguard Worker          } else if (is_global_a3xx_atomic(n->opc) ||
636*61046927SAndroid Build Coastguard Worker                     is_global_a6xx_atomic(n->opc)) {
637*61046927SAndroid Build Coastguard Worker             regmask_set(&state->needs_sy, n->dsts[0]);
638*61046927SAndroid Build Coastguard Worker          } else {
639*61046927SAndroid Build Coastguard Worker             regmask_set(&state->needs_ss, n->dsts[0]);
640*61046927SAndroid Build Coastguard Worker          }
641*61046927SAndroid Build Coastguard Worker       } else if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO) {
642*61046927SAndroid Build Coastguard Worker          state->needs_ss_for_const = true;
643*61046927SAndroid Build Coastguard Worker       }
644*61046927SAndroid Build Coastguard Worker 
645*61046927SAndroid Build Coastguard Worker       if (is_ssbo(n->opc) || is_global_a3xx_atomic(n->opc) ||
646*61046927SAndroid Build Coastguard Worker           is_bindless_atomic(n->opc))
647*61046927SAndroid Build Coastguard Worker          ctx->so->has_ssbo = true;
648*61046927SAndroid Build Coastguard Worker 
649*61046927SAndroid Build Coastguard Worker       /* both tex/sfu appear to not always immediately consume
650*61046927SAndroid Build Coastguard Worker        * their src register(s):
651*61046927SAndroid Build Coastguard Worker        */
652*61046927SAndroid Build Coastguard Worker       if (is_war_hazard_producer(n)) {
653*61046927SAndroid Build Coastguard Worker          /* These WAR hazards can always be resolved with (ss). However, when
654*61046927SAndroid Build Coastguard Worker           * the reader is a sy-producer, they can also be resolved using (sy)
655*61046927SAndroid Build Coastguard Worker           * because once we have synced the reader's results using (sy), its
656*61046927SAndroid Build Coastguard Worker           * sources have definitely been consumed. We track the two cases
657*61046927SAndroid Build Coastguard Worker           * separately so that we don't add an unnecessary (ss) if a (sy) sync
658*61046927SAndroid Build Coastguard Worker           * already happened.
659*61046927SAndroid Build Coastguard Worker           * For example, this prevents adding the unnecessary (ss) in the
660*61046927SAndroid Build Coastguard Worker           * following sequence:
661*61046927SAndroid Build Coastguard Worker           * sam rd, rs, ...
662*61046927SAndroid Build Coastguard Worker           * (sy)... ; sam synced so consumed its sources
663*61046927SAndroid Build Coastguard Worker           * (ss)write rs ; (ss) unnecessary since rs has been consumed already
664*61046927SAndroid Build Coastguard Worker           */
665*61046927SAndroid Build Coastguard Worker          bool needs_ss = is_ss_producer(n) || is_store(n) || n->opc == OPC_STC;
666*61046927SAndroid Build Coastguard Worker 
667*61046927SAndroid Build Coastguard Worker          if (n_is_scalar_alu) {
668*61046927SAndroid Build Coastguard Worker             /* Scalar ALU also does not immediately read its source because it
669*61046927SAndroid Build Coastguard Worker              * is not executed right away, but scalar ALU instructions are
670*61046927SAndroid Build Coastguard Worker              * executed in-order so subsequent scalar ALU instructions don't
671*61046927SAndroid Build Coastguard Worker              * need to wait for previous ones.
672*61046927SAndroid Build Coastguard Worker              */
673*61046927SAndroid Build Coastguard Worker             regmask_t *mask = needs_ss ? &state->needs_ss_scalar_war
674*61046927SAndroid Build Coastguard Worker                                        : &state->needs_ss_or_sy_scalar_war;
675*61046927SAndroid Build Coastguard Worker 
676*61046927SAndroid Build Coastguard Worker             foreach_src (reg, n) {
677*61046927SAndroid Build Coastguard Worker                if ((reg->flags & IR3_REG_SHARED) || is_reg_a0(reg)) {
678*61046927SAndroid Build Coastguard Worker                   regmask_set(mask, reg);
679*61046927SAndroid Build Coastguard Worker                }
680*61046927SAndroid Build Coastguard Worker             }
681*61046927SAndroid Build Coastguard Worker          } else {
682*61046927SAndroid Build Coastguard Worker             regmask_t *mask =
683*61046927SAndroid Build Coastguard Worker                needs_ss ? &state->needs_ss_war : &state->needs_ss_or_sy_war;
684*61046927SAndroid Build Coastguard Worker 
685*61046927SAndroid Build Coastguard Worker             foreach_src (reg, n) {
686*61046927SAndroid Build Coastguard Worker                if (!(reg->flags & (IR3_REG_IMMED | IR3_REG_CONST))) {
687*61046927SAndroid Build Coastguard Worker                   regmask_set(mask, reg);
688*61046927SAndroid Build Coastguard Worker                }
689*61046927SAndroid Build Coastguard Worker             }
690*61046927SAndroid Build Coastguard Worker          }
691*61046927SAndroid Build Coastguard Worker       }
692*61046927SAndroid Build Coastguard Worker 
693*61046927SAndroid Build Coastguard Worker       bool count = count_instruction(n, ctx->compiler);
694*61046927SAndroid Build Coastguard Worker       if (count)
695*61046927SAndroid Build Coastguard Worker          cycle += 1;
696*61046927SAndroid Build Coastguard Worker 
697*61046927SAndroid Build Coastguard Worker       delay_update(state, n, cycle, mergedregs);
698*61046927SAndroid Build Coastguard Worker 
699*61046927SAndroid Build Coastguard Worker       if (count)
700*61046927SAndroid Build Coastguard Worker          cycle += n->repeat;
701*61046927SAndroid Build Coastguard Worker 
702*61046927SAndroid Build Coastguard Worker       if (ctx->early_input_release && is_input(n)) {
703*61046927SAndroid Build Coastguard Worker          last_input_needs_ss |= (n->opc == OPC_LDLV);
704*61046927SAndroid Build Coastguard Worker 
705*61046927SAndroid Build Coastguard Worker          assert(inputs_remaining > 0);
706*61046927SAndroid Build Coastguard Worker          inputs_remaining--;
707*61046927SAndroid Build Coastguard Worker          if (inputs_remaining == 0) {
708*61046927SAndroid Build Coastguard Worker             /* This is the last input. We add the (ei) flag to release
709*61046927SAndroid Build Coastguard Worker              * varying memory after this executes. If it's an ldlv,
710*61046927SAndroid Build Coastguard Worker              * however, we need to insert a dummy bary.f on which we can
711*61046927SAndroid Build Coastguard Worker              * set the (ei) flag. We may also need to insert an (ss) to
712*61046927SAndroid Build Coastguard Worker              * guarantee that all ldlv's have finished fetching their
713*61046927SAndroid Build Coastguard Worker              * results before releasing the varying memory.
714*61046927SAndroid Build Coastguard Worker              */
715*61046927SAndroid Build Coastguard Worker             struct ir3_instruction *last_input = n;
716*61046927SAndroid Build Coastguard Worker             if (n->opc == OPC_LDLV) {
717*61046927SAndroid Build Coastguard Worker                struct ir3_instruction *baryf;
718*61046927SAndroid Build Coastguard Worker 
719*61046927SAndroid Build Coastguard Worker                /* (ss)bary.f (ei)r63.x, 0, r0.x */
720*61046927SAndroid Build Coastguard Worker                baryf = ir3_instr_create(block, OPC_BARY_F, 1, 2);
721*61046927SAndroid Build Coastguard Worker                ir3_dst_create(baryf, regid(63, 0), 0);
722*61046927SAndroid Build Coastguard Worker                ir3_src_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
723*61046927SAndroid Build Coastguard Worker                ir3_src_create(baryf, regid(0, 0), 0);
724*61046927SAndroid Build Coastguard Worker 
725*61046927SAndroid Build Coastguard Worker                last_input = baryf;
726*61046927SAndroid Build Coastguard Worker             }
727*61046927SAndroid Build Coastguard Worker 
728*61046927SAndroid Build Coastguard Worker             last_input->dsts[0]->flags |= IR3_REG_EI;
729*61046927SAndroid Build Coastguard Worker             if (last_input_needs_ss) {
730*61046927SAndroid Build Coastguard Worker                apply_ss(last_input, state, mergedregs);
731*61046927SAndroid Build Coastguard Worker             }
732*61046927SAndroid Build Coastguard Worker          }
733*61046927SAndroid Build Coastguard Worker       }
734*61046927SAndroid Build Coastguard Worker 
735*61046927SAndroid Build Coastguard Worker       last_n = n;
736*61046927SAndroid Build Coastguard Worker    }
737*61046927SAndroid Build Coastguard Worker 
738*61046927SAndroid Build Coastguard Worker    assert(inputs_remaining == 0 || !ctx->early_input_release);
739*61046927SAndroid Build Coastguard Worker 
740*61046927SAndroid Build Coastguard Worker    if (block == ir3_after_preamble(ctx->so->ir) &&
741*61046927SAndroid Build Coastguard Worker        ctx->has_tex_prefetch && !ctx->has_inputs) {
742*61046927SAndroid Build Coastguard Worker       /* texture prefetch, but *no* inputs.. we need to insert a
743*61046927SAndroid Build Coastguard Worker        * dummy bary.f at the top of the shader to unblock varying
744*61046927SAndroid Build Coastguard Worker        * storage:
745*61046927SAndroid Build Coastguard Worker        */
746*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *baryf;
747*61046927SAndroid Build Coastguard Worker 
748*61046927SAndroid Build Coastguard Worker       /* (ss)bary.f (ei)r63.x, 0, r0.x */
749*61046927SAndroid Build Coastguard Worker       baryf = ir3_instr_create(block, OPC_BARY_F, 1, 2);
750*61046927SAndroid Build Coastguard Worker       ir3_dst_create(baryf, regid(63, 0), 0)->flags |= IR3_REG_EI;
751*61046927SAndroid Build Coastguard Worker       ir3_src_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
752*61046927SAndroid Build Coastguard Worker       ir3_src_create(baryf, regid(0, 0), 0);
753*61046927SAndroid Build Coastguard Worker 
754*61046927SAndroid Build Coastguard Worker       /* insert the dummy bary.f at head: */
755*61046927SAndroid Build Coastguard Worker       list_delinit(&baryf->node);
756*61046927SAndroid Build Coastguard Worker       list_add(&baryf->node, &block->instr_list);
757*61046927SAndroid Build Coastguard Worker    }
758*61046927SAndroid Build Coastguard Worker 
759*61046927SAndroid Build Coastguard Worker    /* Currently our nop state contains the cycle offset from the start of this
760*61046927SAndroid Build Coastguard Worker     * block when each register becomes ready. But successor blocks need the
761*61046927SAndroid Build Coastguard Worker     * cycle offset from their start, which is this block's end. Translate the
762*61046927SAndroid Build Coastguard Worker     * cycle offset.
763*61046927SAndroid Build Coastguard Worker     */
764*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ARRAY_SIZE(state->pred_ready); i++)
765*61046927SAndroid Build Coastguard Worker       state->pred_ready[i] = MAX2(state->pred_ready[i], cycle) - cycle;
766*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ARRAY_SIZE(state->alu_nop.full_ready); i++) {
767*61046927SAndroid Build Coastguard Worker       state->alu_nop.full_ready[i] =
768*61046927SAndroid Build Coastguard Worker          MAX2(state->alu_nop.full_ready[i], cycle) - cycle;
769*61046927SAndroid Build Coastguard Worker       state->alu_nop.half_ready[i] =
770*61046927SAndroid Build Coastguard Worker          MAX2(state->alu_nop.half_ready[i], cycle) - cycle;
771*61046927SAndroid Build Coastguard Worker       state->non_alu_nop.full_ready[i] =
772*61046927SAndroid Build Coastguard Worker          MAX2(state->non_alu_nop.full_ready[i], cycle) - cycle;
773*61046927SAndroid Build Coastguard Worker       state->non_alu_nop.half_ready[i] =
774*61046927SAndroid Build Coastguard Worker          MAX2(state->non_alu_nop.half_ready[i], cycle) - cycle;
775*61046927SAndroid Build Coastguard Worker    }
776*61046927SAndroid Build Coastguard Worker 
777*61046927SAndroid Build Coastguard Worker    bd->valid = true;
778*61046927SAndroid Build Coastguard Worker 
779*61046927SAndroid Build Coastguard Worker    if (memcmp(&prev_state, state, sizeof(*state))) {
780*61046927SAndroid Build Coastguard Worker       /* our output state changed, this invalidates all of our
781*61046927SAndroid Build Coastguard Worker        * successors:
782*61046927SAndroid Build Coastguard Worker        */
783*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
784*61046927SAndroid Build Coastguard Worker          if (!block->successors[i])
785*61046927SAndroid Build Coastguard Worker             break;
786*61046927SAndroid Build Coastguard Worker          struct ir3_legalize_block_data *pbd = block->successors[i]->data;
787*61046927SAndroid Build Coastguard Worker          pbd->valid = false;
788*61046927SAndroid Build Coastguard Worker       }
789*61046927SAndroid Build Coastguard Worker    }
790*61046927SAndroid Build Coastguard Worker 
791*61046927SAndroid Build Coastguard Worker    return true;
792*61046927SAndroid Build Coastguard Worker }
793*61046927SAndroid Build Coastguard Worker 
794*61046927SAndroid Build Coastguard Worker /* Expands dsxpp and dsypp macros to:
795*61046927SAndroid Build Coastguard Worker  *
796*61046927SAndroid Build Coastguard Worker  * dsxpp.1 dst, src
797*61046927SAndroid Build Coastguard Worker  * dsxpp.1.p dst, src
798*61046927SAndroid Build Coastguard Worker  *
799*61046927SAndroid Build Coastguard Worker  * We apply this after flags syncing, as we don't want to sync in between the
800*61046927SAndroid Build Coastguard Worker  * two (which might happen if dst == src).
801*61046927SAndroid Build Coastguard Worker  */
802*61046927SAndroid Build Coastguard Worker static bool
apply_fine_deriv_macro(struct ir3_legalize_ctx * ctx,struct ir3_block * block)803*61046927SAndroid Build Coastguard Worker apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
804*61046927SAndroid Build Coastguard Worker {
805*61046927SAndroid Build Coastguard Worker    struct list_head instr_list;
806*61046927SAndroid Build Coastguard Worker 
807*61046927SAndroid Build Coastguard Worker    /* remove all the instructions from the list, we'll be adding
808*61046927SAndroid Build Coastguard Worker     * them back in as we go
809*61046927SAndroid Build Coastguard Worker     */
810*61046927SAndroid Build Coastguard Worker    list_replace(&block->instr_list, &instr_list);
811*61046927SAndroid Build Coastguard Worker    list_inithead(&block->instr_list);
812*61046927SAndroid Build Coastguard Worker 
813*61046927SAndroid Build Coastguard Worker    foreach_instr_safe (n, &instr_list) {
814*61046927SAndroid Build Coastguard Worker       list_addtail(&n->node, &block->instr_list);
815*61046927SAndroid Build Coastguard Worker 
816*61046927SAndroid Build Coastguard Worker       if (n->opc == OPC_DSXPP_MACRO || n->opc == OPC_DSYPP_MACRO) {
817*61046927SAndroid Build Coastguard Worker          n->opc = (n->opc == OPC_DSXPP_MACRO) ? OPC_DSXPP_1 : OPC_DSYPP_1;
818*61046927SAndroid Build Coastguard Worker 
819*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *op_p = ir3_instr_clone(n);
820*61046927SAndroid Build Coastguard Worker          op_p->flags = IR3_INSTR_P;
821*61046927SAndroid Build Coastguard Worker 
822*61046927SAndroid Build Coastguard Worker          ctx->so->need_full_quad = true;
823*61046927SAndroid Build Coastguard Worker       }
824*61046927SAndroid Build Coastguard Worker    }
825*61046927SAndroid Build Coastguard Worker 
826*61046927SAndroid Build Coastguard Worker    return true;
827*61046927SAndroid Build Coastguard Worker }
828*61046927SAndroid Build Coastguard Worker 
829*61046927SAndroid Build Coastguard Worker /* Some instructions can take a dummy destination of r63.x, which we model as it
830*61046927SAndroid Build Coastguard Worker  * not having a destination in the IR to avoid having special code to handle
831*61046927SAndroid Build Coastguard Worker  * this. Insert the dummy destination after everything else is done.
832*61046927SAndroid Build Coastguard Worker  */
833*61046927SAndroid Build Coastguard Worker static bool
expand_dummy_dests(struct ir3_block * block)834*61046927SAndroid Build Coastguard Worker expand_dummy_dests(struct ir3_block *block)
835*61046927SAndroid Build Coastguard Worker {
836*61046927SAndroid Build Coastguard Worker    foreach_instr (n, &block->instr_list) {
837*61046927SAndroid Build Coastguard Worker       if ((n->opc == OPC_SAM || n->opc == OPC_LDC || n->opc == OPC_RESINFO) &&
838*61046927SAndroid Build Coastguard Worker           n->dsts_count == 0) {
839*61046927SAndroid Build Coastguard Worker          struct ir3_register *dst = ir3_dst_create(n, INVALID_REG, 0);
840*61046927SAndroid Build Coastguard Worker          /* Copy the blob's writemask */
841*61046927SAndroid Build Coastguard Worker          if (n->opc == OPC_SAM)
842*61046927SAndroid Build Coastguard Worker             dst->wrmask = 0b1111;
843*61046927SAndroid Build Coastguard Worker       }
844*61046927SAndroid Build Coastguard Worker    }
845*61046927SAndroid Build Coastguard Worker    return true;
846*61046927SAndroid Build Coastguard Worker }
847*61046927SAndroid Build Coastguard Worker 
848*61046927SAndroid Build Coastguard Worker static void
apply_push_consts_load_macro(struct ir3_legalize_ctx * ctx,struct ir3_block * block)849*61046927SAndroid Build Coastguard Worker apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx,
850*61046927SAndroid Build Coastguard Worker                              struct ir3_block *block)
851*61046927SAndroid Build Coastguard Worker {
852*61046927SAndroid Build Coastguard Worker    foreach_instr (n, &block->instr_list) {
853*61046927SAndroid Build Coastguard Worker       if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO) {
854*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *stsc = ir3_instr_create(block, OPC_STSC, 0, 2);
855*61046927SAndroid Build Coastguard Worker          ir3_instr_move_after(stsc, n);
856*61046927SAndroid Build Coastguard Worker          ir3_src_create(stsc, 0, IR3_REG_IMMED)->iim_val =
857*61046927SAndroid Build Coastguard Worker             n->push_consts.dst_base;
858*61046927SAndroid Build Coastguard Worker          ir3_src_create(stsc, 0, IR3_REG_IMMED)->iim_val =
859*61046927SAndroid Build Coastguard Worker             n->push_consts.src_base;
860*61046927SAndroid Build Coastguard Worker          stsc->cat6.iim_val = n->push_consts.src_size;
861*61046927SAndroid Build Coastguard Worker          stsc->cat6.type = TYPE_U32;
862*61046927SAndroid Build Coastguard Worker 
863*61046927SAndroid Build Coastguard Worker          if (ctx->compiler->stsc_duplication_quirk) {
864*61046927SAndroid Build Coastguard Worker             struct ir3_instruction *nop = ir3_NOP(block);
865*61046927SAndroid Build Coastguard Worker             ir3_instr_move_after(nop, stsc);
866*61046927SAndroid Build Coastguard Worker             nop->flags |= IR3_INSTR_SS;
867*61046927SAndroid Build Coastguard Worker             ir3_instr_move_after(ir3_instr_clone(stsc), nop);
868*61046927SAndroid Build Coastguard Worker          }
869*61046927SAndroid Build Coastguard Worker 
870*61046927SAndroid Build Coastguard Worker          list_delinit(&n->node);
871*61046927SAndroid Build Coastguard Worker          break;
872*61046927SAndroid Build Coastguard Worker       } else if (!is_meta(n)) {
873*61046927SAndroid Build Coastguard Worker          break;
874*61046927SAndroid Build Coastguard Worker       }
875*61046927SAndroid Build Coastguard Worker    }
876*61046927SAndroid Build Coastguard Worker }
877*61046927SAndroid Build Coastguard Worker 
878*61046927SAndroid Build Coastguard Worker /* NOTE: branch instructions are always the last instruction(s)
879*61046927SAndroid Build Coastguard Worker  * in the block.  We take advantage of this as we resolve the
880*61046927SAndroid Build Coastguard Worker  * branches, since "if (foo) break;" constructs turn into
881*61046927SAndroid Build Coastguard Worker  * something like:
882*61046927SAndroid Build Coastguard Worker  *
883*61046927SAndroid Build Coastguard Worker  *   block3 {
884*61046927SAndroid Build Coastguard Worker  *   	...
885*61046927SAndroid Build Coastguard Worker  *   	0029:021: mov.s32s32 r62.x, r1.y
886*61046927SAndroid Build Coastguard Worker  *   	0082:022: br !p0.x, target=block5
887*61046927SAndroid Build Coastguard Worker  *   	0083:023: br p0.x, target=block4
888*61046927SAndroid Build Coastguard Worker  *   	// succs: if _[0029:021: mov.s32s32] block4; else block5;
889*61046927SAndroid Build Coastguard Worker  *   }
890*61046927SAndroid Build Coastguard Worker  *   block4 {
891*61046927SAndroid Build Coastguard Worker  *   	0084:024: jump, target=block6
892*61046927SAndroid Build Coastguard Worker  *   	// succs: block6;
893*61046927SAndroid Build Coastguard Worker  *   }
894*61046927SAndroid Build Coastguard Worker  *   block5 {
895*61046927SAndroid Build Coastguard Worker  *   	0085:025: jump, target=block7
896*61046927SAndroid Build Coastguard Worker  *   	// succs: block7;
897*61046927SAndroid Build Coastguard Worker  *   }
898*61046927SAndroid Build Coastguard Worker  *
899*61046927SAndroid Build Coastguard Worker  * ie. only instruction in block4/block5 is a jump, so when
900*61046927SAndroid Build Coastguard Worker  * resolving branches we can easily detect this by checking
901*61046927SAndroid Build Coastguard Worker  * that the first instruction in the target block is itself
902*61046927SAndroid Build Coastguard Worker  * a jump, and setup the br directly to the jump's target
903*61046927SAndroid Build Coastguard Worker  * (and strip back out the now unreached jump)
904*61046927SAndroid Build Coastguard Worker  *
905*61046927SAndroid Build Coastguard Worker  * TODO sometimes we end up with things like:
906*61046927SAndroid Build Coastguard Worker  *
907*61046927SAndroid Build Coastguard Worker  *    br !p0.x, #2
908*61046927SAndroid Build Coastguard Worker  *    br p0.x, #12
909*61046927SAndroid Build Coastguard Worker  *    add.u r0.y, r0.y, 1
910*61046927SAndroid Build Coastguard Worker  *
911*61046927SAndroid Build Coastguard Worker  * If we swapped the order of the branches, we could drop one.
912*61046927SAndroid Build Coastguard Worker  */
913*61046927SAndroid Build Coastguard Worker static struct ir3_block *
resolve_dest_block(struct ir3_block * block)914*61046927SAndroid Build Coastguard Worker resolve_dest_block(struct ir3_block *block)
915*61046927SAndroid Build Coastguard Worker {
916*61046927SAndroid Build Coastguard Worker    /* special case for last block: */
917*61046927SAndroid Build Coastguard Worker    if (!block->successors[0])
918*61046927SAndroid Build Coastguard Worker       return block;
919*61046927SAndroid Build Coastguard Worker 
920*61046927SAndroid Build Coastguard Worker    /* NOTE that we may or may not have inserted the jump
921*61046927SAndroid Build Coastguard Worker     * in the target block yet, so conditions to resolve
922*61046927SAndroid Build Coastguard Worker     * the dest to the dest block's successor are:
923*61046927SAndroid Build Coastguard Worker     *
924*61046927SAndroid Build Coastguard Worker     *   (1) successor[1] == NULL &&
925*61046927SAndroid Build Coastguard Worker     *   (2) (block-is-empty || only-instr-is-jump)
926*61046927SAndroid Build Coastguard Worker     */
927*61046927SAndroid Build Coastguard Worker    if (block->successors[1] == NULL) {
928*61046927SAndroid Build Coastguard Worker       if (list_is_empty(&block->instr_list)) {
929*61046927SAndroid Build Coastguard Worker          return block->successors[0];
930*61046927SAndroid Build Coastguard Worker       } else if (list_length(&block->instr_list) == 1) {
931*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *instr =
932*61046927SAndroid Build Coastguard Worker             list_first_entry(&block->instr_list, struct ir3_instruction, node);
933*61046927SAndroid Build Coastguard Worker          if (instr->opc == OPC_JUMP) {
934*61046927SAndroid Build Coastguard Worker             /* If this jump is backwards, then we will probably convert
935*61046927SAndroid Build Coastguard Worker              * the jump being resolved to a backwards jump, which will
936*61046927SAndroid Build Coastguard Worker              * change a loop-with-continue or loop-with-if into a
937*61046927SAndroid Build Coastguard Worker              * doubly-nested loop and change the convergence behavior.
938*61046927SAndroid Build Coastguard Worker              * Disallow this here.
939*61046927SAndroid Build Coastguard Worker              */
940*61046927SAndroid Build Coastguard Worker             if (block->successors[0]->index <= block->index)
941*61046927SAndroid Build Coastguard Worker                return block;
942*61046927SAndroid Build Coastguard Worker             return block->successors[0];
943*61046927SAndroid Build Coastguard Worker          }
944*61046927SAndroid Build Coastguard Worker       }
945*61046927SAndroid Build Coastguard Worker    }
946*61046927SAndroid Build Coastguard Worker    return block;
947*61046927SAndroid Build Coastguard Worker }
948*61046927SAndroid Build Coastguard Worker 
949*61046927SAndroid Build Coastguard Worker static void
remove_unused_block(struct ir3_block * old_target)950*61046927SAndroid Build Coastguard Worker remove_unused_block(struct ir3_block *old_target)
951*61046927SAndroid Build Coastguard Worker {
952*61046927SAndroid Build Coastguard Worker    list_delinit(&old_target->node);
953*61046927SAndroid Build Coastguard Worker 
954*61046927SAndroid Build Coastguard Worker    /* cleanup dangling predecessors: */
955*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ARRAY_SIZE(old_target->successors); i++) {
956*61046927SAndroid Build Coastguard Worker       if (old_target->successors[i]) {
957*61046927SAndroid Build Coastguard Worker          struct ir3_block *succ = old_target->successors[i];
958*61046927SAndroid Build Coastguard Worker          ir3_block_remove_predecessor(succ, old_target);
959*61046927SAndroid Build Coastguard Worker       }
960*61046927SAndroid Build Coastguard Worker    }
961*61046927SAndroid Build Coastguard Worker }
962*61046927SAndroid Build Coastguard Worker 
963*61046927SAndroid Build Coastguard Worker static bool
retarget_jump(struct ir3_instruction * instr,struct ir3_block * new_target)964*61046927SAndroid Build Coastguard Worker retarget_jump(struct ir3_instruction *instr, struct ir3_block *new_target)
965*61046927SAndroid Build Coastguard Worker {
966*61046927SAndroid Build Coastguard Worker    struct ir3_block *old_target = instr->cat0.target;
967*61046927SAndroid Build Coastguard Worker    struct ir3_block *cur_block = instr->block;
968*61046927SAndroid Build Coastguard Worker 
969*61046927SAndroid Build Coastguard Worker    /* update current blocks successors to reflect the retargetting: */
970*61046927SAndroid Build Coastguard Worker    if (cur_block->successors[0] == old_target) {
971*61046927SAndroid Build Coastguard Worker       cur_block->successors[0] = new_target;
972*61046927SAndroid Build Coastguard Worker    } else {
973*61046927SAndroid Build Coastguard Worker       assert(cur_block->successors[1] == old_target);
974*61046927SAndroid Build Coastguard Worker       cur_block->successors[1] = new_target;
975*61046927SAndroid Build Coastguard Worker    }
976*61046927SAndroid Build Coastguard Worker 
977*61046927SAndroid Build Coastguard Worker    /* update new target's predecessors: */
978*61046927SAndroid Build Coastguard Worker    ir3_block_add_predecessor(new_target, cur_block);
979*61046927SAndroid Build Coastguard Worker 
980*61046927SAndroid Build Coastguard Worker    /* and remove old_target's predecessor: */
981*61046927SAndroid Build Coastguard Worker    ir3_block_remove_predecessor(old_target, cur_block);
982*61046927SAndroid Build Coastguard Worker 
983*61046927SAndroid Build Coastguard Worker    instr->cat0.target = new_target;
984*61046927SAndroid Build Coastguard Worker 
985*61046927SAndroid Build Coastguard Worker    if (old_target->predecessors_count == 0) {
986*61046927SAndroid Build Coastguard Worker       remove_unused_block(old_target);
987*61046927SAndroid Build Coastguard Worker       return true;
988*61046927SAndroid Build Coastguard Worker    }
989*61046927SAndroid Build Coastguard Worker 
990*61046927SAndroid Build Coastguard Worker    return false;
991*61046927SAndroid Build Coastguard Worker }
992*61046927SAndroid Build Coastguard Worker 
993*61046927SAndroid Build Coastguard Worker static bool
is_invertible_branch(struct ir3_instruction * instr)994*61046927SAndroid Build Coastguard Worker is_invertible_branch(struct ir3_instruction *instr)
995*61046927SAndroid Build Coastguard Worker {
996*61046927SAndroid Build Coastguard Worker    switch (instr->opc) {
997*61046927SAndroid Build Coastguard Worker    case OPC_BR:
998*61046927SAndroid Build Coastguard Worker    case OPC_BRAA:
999*61046927SAndroid Build Coastguard Worker    case OPC_BRAO:
1000*61046927SAndroid Build Coastguard Worker    case OPC_BANY:
1001*61046927SAndroid Build Coastguard Worker    case OPC_BALL:
1002*61046927SAndroid Build Coastguard Worker       return true;
1003*61046927SAndroid Build Coastguard Worker    default:
1004*61046927SAndroid Build Coastguard Worker       return false;
1005*61046927SAndroid Build Coastguard Worker    }
1006*61046927SAndroid Build Coastguard Worker }
1007*61046927SAndroid Build Coastguard Worker 
1008*61046927SAndroid Build Coastguard Worker static bool
opt_jump(struct ir3 * ir)1009*61046927SAndroid Build Coastguard Worker opt_jump(struct ir3 *ir)
1010*61046927SAndroid Build Coastguard Worker {
1011*61046927SAndroid Build Coastguard Worker    bool progress = false;
1012*61046927SAndroid Build Coastguard Worker 
1013*61046927SAndroid Build Coastguard Worker    unsigned index = 0;
1014*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list)
1015*61046927SAndroid Build Coastguard Worker       block->index = index++;
1016*61046927SAndroid Build Coastguard Worker 
1017*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1018*61046927SAndroid Build Coastguard Worker       /* This pass destroys the physical CFG so don't keep it around to avoid
1019*61046927SAndroid Build Coastguard Worker        * validation errors.
1020*61046927SAndroid Build Coastguard Worker        */
1021*61046927SAndroid Build Coastguard Worker       block->physical_successors_count = 0;
1022*61046927SAndroid Build Coastguard Worker       block->physical_predecessors_count = 0;
1023*61046927SAndroid Build Coastguard Worker 
1024*61046927SAndroid Build Coastguard Worker       foreach_instr (instr, &block->instr_list) {
1025*61046927SAndroid Build Coastguard Worker          if (!is_flow(instr) || !instr->cat0.target)
1026*61046927SAndroid Build Coastguard Worker             continue;
1027*61046927SAndroid Build Coastguard Worker 
1028*61046927SAndroid Build Coastguard Worker          struct ir3_block *tblock = resolve_dest_block(instr->cat0.target);
1029*61046927SAndroid Build Coastguard Worker          if (tblock != instr->cat0.target) {
1030*61046927SAndroid Build Coastguard Worker             progress = true;
1031*61046927SAndroid Build Coastguard Worker 
1032*61046927SAndroid Build Coastguard Worker             /* Exit early if we deleted a block to avoid iterator
1033*61046927SAndroid Build Coastguard Worker              * weirdness/assert fails
1034*61046927SAndroid Build Coastguard Worker              */
1035*61046927SAndroid Build Coastguard Worker             if (retarget_jump(instr, tblock))
1036*61046927SAndroid Build Coastguard Worker                return true;
1037*61046927SAndroid Build Coastguard Worker          }
1038*61046927SAndroid Build Coastguard Worker       }
1039*61046927SAndroid Build Coastguard Worker 
1040*61046927SAndroid Build Coastguard Worker       /* Detect the case where the block ends either with:
1041*61046927SAndroid Build Coastguard Worker        * - A single unconditional jump to the next block.
1042*61046927SAndroid Build Coastguard Worker        * - Two jump instructions with opposite conditions, and one of the
1043*61046927SAndroid Build Coastguard Worker        *   them jumps to the next block.
1044*61046927SAndroid Build Coastguard Worker        * We can remove the one that jumps to the next block in either case.
1045*61046927SAndroid Build Coastguard Worker        */
1046*61046927SAndroid Build Coastguard Worker       if (list_is_empty(&block->instr_list))
1047*61046927SAndroid Build Coastguard Worker          continue;
1048*61046927SAndroid Build Coastguard Worker 
1049*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *jumps[2] = {NULL, NULL};
1050*61046927SAndroid Build Coastguard Worker       jumps[0] =
1051*61046927SAndroid Build Coastguard Worker          list_last_entry(&block->instr_list, struct ir3_instruction, node);
1052*61046927SAndroid Build Coastguard Worker       if (!list_is_singular(&block->instr_list))
1053*61046927SAndroid Build Coastguard Worker          jumps[1] =
1054*61046927SAndroid Build Coastguard Worker             list_last_entry(&jumps[0]->node, struct ir3_instruction, node);
1055*61046927SAndroid Build Coastguard Worker 
1056*61046927SAndroid Build Coastguard Worker       if (jumps[0]->opc == OPC_JUMP)
1057*61046927SAndroid Build Coastguard Worker          jumps[1] = NULL;
1058*61046927SAndroid Build Coastguard Worker       else if (!is_invertible_branch(jumps[0]) || !jumps[1] ||
1059*61046927SAndroid Build Coastguard Worker                !is_invertible_branch(jumps[1])) {
1060*61046927SAndroid Build Coastguard Worker          continue;
1061*61046927SAndroid Build Coastguard Worker       }
1062*61046927SAndroid Build Coastguard Worker 
1063*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < 2; i++) {
1064*61046927SAndroid Build Coastguard Worker          if (!jumps[i])
1065*61046927SAndroid Build Coastguard Worker             continue;
1066*61046927SAndroid Build Coastguard Worker          struct ir3_block *tblock = jumps[i]->cat0.target;
1067*61046927SAndroid Build Coastguard Worker          if (&tblock->node == block->node.next) {
1068*61046927SAndroid Build Coastguard Worker             list_delinit(&jumps[i]->node);
1069*61046927SAndroid Build Coastguard Worker             progress = true;
1070*61046927SAndroid Build Coastguard Worker             break;
1071*61046927SAndroid Build Coastguard Worker          }
1072*61046927SAndroid Build Coastguard Worker       }
1073*61046927SAndroid Build Coastguard Worker    }
1074*61046927SAndroid Build Coastguard Worker 
1075*61046927SAndroid Build Coastguard Worker    return progress;
1076*61046927SAndroid Build Coastguard Worker }
1077*61046927SAndroid Build Coastguard Worker 
1078*61046927SAndroid Build Coastguard Worker static void
resolve_jumps(struct ir3 * ir)1079*61046927SAndroid Build Coastguard Worker resolve_jumps(struct ir3 *ir)
1080*61046927SAndroid Build Coastguard Worker {
1081*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list)
1082*61046927SAndroid Build Coastguard Worker       foreach_instr (instr, &block->instr_list)
1083*61046927SAndroid Build Coastguard Worker          if (is_flow(instr) && instr->cat0.target) {
1084*61046927SAndroid Build Coastguard Worker             struct ir3_instruction *target = list_first_entry(
1085*61046927SAndroid Build Coastguard Worker                &instr->cat0.target->instr_list, struct ir3_instruction, node);
1086*61046927SAndroid Build Coastguard Worker 
1087*61046927SAndroid Build Coastguard Worker             instr->cat0.immed = (int)target->ip - (int)instr->ip;
1088*61046927SAndroid Build Coastguard Worker          }
1089*61046927SAndroid Build Coastguard Worker }
1090*61046927SAndroid Build Coastguard Worker 
1091*61046927SAndroid Build Coastguard Worker static void
mark_jp(struct ir3_block * block)1092*61046927SAndroid Build Coastguard Worker mark_jp(struct ir3_block *block)
1093*61046927SAndroid Build Coastguard Worker {
1094*61046927SAndroid Build Coastguard Worker    /* We only call this on the end block (in kill_sched) or after retargeting
1095*61046927SAndroid Build Coastguard Worker     * all jumps to empty blocks (in mark_xvergence_points) so there's no need to
1096*61046927SAndroid Build Coastguard Worker     * worry about empty blocks.
1097*61046927SAndroid Build Coastguard Worker     */
1098*61046927SAndroid Build Coastguard Worker    assert(!list_is_empty(&block->instr_list));
1099*61046927SAndroid Build Coastguard Worker 
1100*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *target =
1101*61046927SAndroid Build Coastguard Worker       list_first_entry(&block->instr_list, struct ir3_instruction, node);
1102*61046927SAndroid Build Coastguard Worker    target->flags |= IR3_INSTR_JP;
1103*61046927SAndroid Build Coastguard Worker }
1104*61046927SAndroid Build Coastguard Worker 
1105*61046927SAndroid Build Coastguard Worker /* Mark points where control flow reconverges.
1106*61046927SAndroid Build Coastguard Worker  *
1107*61046927SAndroid Build Coastguard Worker  * Re-convergence points are where "parked" threads are reconverged with threads
1108*61046927SAndroid Build Coastguard Worker  * that took the opposite path last time around. We already calculated them, we
1109*61046927SAndroid Build Coastguard Worker  * just need to mark them with (jp).
1110*61046927SAndroid Build Coastguard Worker  */
1111*61046927SAndroid Build Coastguard Worker static void
mark_xvergence_points(struct ir3 * ir)1112*61046927SAndroid Build Coastguard Worker mark_xvergence_points(struct ir3 *ir)
1113*61046927SAndroid Build Coastguard Worker {
1114*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1115*61046927SAndroid Build Coastguard Worker       if (block->reconvergence_point)
1116*61046927SAndroid Build Coastguard Worker          mark_jp(block);
1117*61046927SAndroid Build Coastguard Worker    }
1118*61046927SAndroid Build Coastguard Worker }
1119*61046927SAndroid Build Coastguard Worker 
1120*61046927SAndroid Build Coastguard Worker static void
invert_branch(struct ir3_instruction * branch)1121*61046927SAndroid Build Coastguard Worker invert_branch(struct ir3_instruction *branch)
1122*61046927SAndroid Build Coastguard Worker {
1123*61046927SAndroid Build Coastguard Worker    switch (branch->opc) {
1124*61046927SAndroid Build Coastguard Worker    case OPC_BR:
1125*61046927SAndroid Build Coastguard Worker       break;
1126*61046927SAndroid Build Coastguard Worker    case OPC_BALL:
1127*61046927SAndroid Build Coastguard Worker       branch->opc = OPC_BANY;
1128*61046927SAndroid Build Coastguard Worker       break;
1129*61046927SAndroid Build Coastguard Worker    case OPC_BANY:
1130*61046927SAndroid Build Coastguard Worker       branch->opc = OPC_BALL;
1131*61046927SAndroid Build Coastguard Worker       break;
1132*61046927SAndroid Build Coastguard Worker    case OPC_BRAA:
1133*61046927SAndroid Build Coastguard Worker       branch->opc = OPC_BRAO;
1134*61046927SAndroid Build Coastguard Worker       break;
1135*61046927SAndroid Build Coastguard Worker    case OPC_BRAO:
1136*61046927SAndroid Build Coastguard Worker       branch->opc = OPC_BRAA;
1137*61046927SAndroid Build Coastguard Worker       break;
1138*61046927SAndroid Build Coastguard Worker    default:
1139*61046927SAndroid Build Coastguard Worker       unreachable("can't get here");
1140*61046927SAndroid Build Coastguard Worker    }
1141*61046927SAndroid Build Coastguard Worker 
1142*61046927SAndroid Build Coastguard Worker    branch->cat0.inv1 = !branch->cat0.inv1;
1143*61046927SAndroid Build Coastguard Worker    branch->cat0.inv2 = !branch->cat0.inv2;
1144*61046927SAndroid Build Coastguard Worker    branch->cat0.target = branch->block->successors[1];
1145*61046927SAndroid Build Coastguard Worker }
1146*61046927SAndroid Build Coastguard Worker 
1147*61046927SAndroid Build Coastguard Worker /* Insert the branch/jump instructions for flow control between blocks.
1148*61046927SAndroid Build Coastguard Worker  * Initially this is done naively, without considering if the successor
1149*61046927SAndroid Build Coastguard Worker  * block immediately follows the current block (ie. so no jump required),
1150*61046927SAndroid Build Coastguard Worker  * but that is cleaned up in opt_jump().
1151*61046927SAndroid Build Coastguard Worker  */
1152*61046927SAndroid Build Coastguard Worker static void
block_sched(struct ir3 * ir)1153*61046927SAndroid Build Coastguard Worker block_sched(struct ir3 *ir)
1154*61046927SAndroid Build Coastguard Worker {
1155*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1156*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *terminator = ir3_block_get_terminator(block);
1157*61046927SAndroid Build Coastguard Worker 
1158*61046927SAndroid Build Coastguard Worker       if (block->successors[1]) {
1159*61046927SAndroid Build Coastguard Worker          /* if/else, conditional branches to "then" or "else": */
1160*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *br1, *br2;
1161*61046927SAndroid Build Coastguard Worker 
1162*61046927SAndroid Build Coastguard Worker          assert(terminator);
1163*61046927SAndroid Build Coastguard Worker          unsigned opc = terminator->opc;
1164*61046927SAndroid Build Coastguard Worker 
1165*61046927SAndroid Build Coastguard Worker          if (opc == OPC_GETONE || opc == OPC_SHPS || opc == OPC_GETLAST) {
1166*61046927SAndroid Build Coastguard Worker             /* getone/shps can't be inverted, and it wouldn't even make sense
1167*61046927SAndroid Build Coastguard Worker              * to follow it with an inverted branch, so follow it by an
1168*61046927SAndroid Build Coastguard Worker              * unconditional branch.
1169*61046927SAndroid Build Coastguard Worker              */
1170*61046927SAndroid Build Coastguard Worker             assert(terminator->srcs_count == 0);
1171*61046927SAndroid Build Coastguard Worker             br1 = terminator;
1172*61046927SAndroid Build Coastguard Worker             br1->cat0.target = block->successors[1];
1173*61046927SAndroid Build Coastguard Worker 
1174*61046927SAndroid Build Coastguard Worker             br2 = ir3_JUMP(block);
1175*61046927SAndroid Build Coastguard Worker             br2->cat0.target = block->successors[0];
1176*61046927SAndroid Build Coastguard Worker          } else if (opc == OPC_BR || opc == OPC_BRAA || opc == OPC_BRAO ||
1177*61046927SAndroid Build Coastguard Worker                     opc == OPC_BALL || opc == OPC_BANY) {
1178*61046927SAndroid Build Coastguard Worker             /* create "else" branch first (since "then" block should
1179*61046927SAndroid Build Coastguard Worker              * frequently/always end up being a fall-thru):
1180*61046927SAndroid Build Coastguard Worker              */
1181*61046927SAndroid Build Coastguard Worker             br1 = terminator;
1182*61046927SAndroid Build Coastguard Worker             br2 = ir3_instr_clone(br1);
1183*61046927SAndroid Build Coastguard Worker             invert_branch(br1);
1184*61046927SAndroid Build Coastguard Worker             br2->cat0.target = block->successors[0];
1185*61046927SAndroid Build Coastguard Worker          } else {
1186*61046927SAndroid Build Coastguard Worker             assert(opc == OPC_PREDT || opc == OPC_PREDF);
1187*61046927SAndroid Build Coastguard Worker 
1188*61046927SAndroid Build Coastguard Worker             /* Handled by prede_sched. */
1189*61046927SAndroid Build Coastguard Worker             terminator->cat0.target = block->successors[0];
1190*61046927SAndroid Build Coastguard Worker             continue;
1191*61046927SAndroid Build Coastguard Worker          }
1192*61046927SAndroid Build Coastguard Worker 
1193*61046927SAndroid Build Coastguard Worker          /* Creating br2 caused it to be moved before the terminator b1, move it
1194*61046927SAndroid Build Coastguard Worker           * back.
1195*61046927SAndroid Build Coastguard Worker           */
1196*61046927SAndroid Build Coastguard Worker          ir3_instr_move_after(br2, br1);
1197*61046927SAndroid Build Coastguard Worker       } else if (block->successors[0]) {
1198*61046927SAndroid Build Coastguard Worker          /* otherwise unconditional jump or predt/predf to next block which
1199*61046927SAndroid Build Coastguard Worker           * should already have been inserted.
1200*61046927SAndroid Build Coastguard Worker           */
1201*61046927SAndroid Build Coastguard Worker          assert(terminator);
1202*61046927SAndroid Build Coastguard Worker          assert(terminator->opc == OPC_JUMP || terminator->opc == OPC_PREDT ||
1203*61046927SAndroid Build Coastguard Worker                 terminator->opc == OPC_PREDF);
1204*61046927SAndroid Build Coastguard Worker          terminator->cat0.target = block->successors[0];
1205*61046927SAndroid Build Coastguard Worker       }
1206*61046927SAndroid Build Coastguard Worker    }
1207*61046927SAndroid Build Coastguard Worker }
1208*61046927SAndroid Build Coastguard Worker 
1209*61046927SAndroid Build Coastguard Worker static void
prede_sched(struct ir3 * ir)1210*61046927SAndroid Build Coastguard Worker prede_sched(struct ir3 *ir)
1211*61046927SAndroid Build Coastguard Worker {
1212*61046927SAndroid Build Coastguard Worker    unsigned index = 0;
1213*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list)
1214*61046927SAndroid Build Coastguard Worker       block->index = index++;
1215*61046927SAndroid Build Coastguard Worker 
1216*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1217*61046927SAndroid Build Coastguard Worker       /* Look for the following pattern generated by NIR lowering. The numbers
1218*61046927SAndroid Build Coastguard Worker        * at the top of blocks are their index.
1219*61046927SAndroid Build Coastguard Worker        *        |--- i ----|
1220*61046927SAndroid Build Coastguard Worker        *        |   ...    |
1221*61046927SAndroid Build Coastguard Worker        *        | pred[tf] |
1222*61046927SAndroid Build Coastguard Worker        *        |----------|
1223*61046927SAndroid Build Coastguard Worker        *      succ0 /   \ succ1
1224*61046927SAndroid Build Coastguard Worker        * |-- i+1 ---| |-- i+2 ---|
1225*61046927SAndroid Build Coastguard Worker        * |    ...   | |   ...    |
1226*61046927SAndroid Build Coastguard Worker        * | pred[ft] | |   ...    |
1227*61046927SAndroid Build Coastguard Worker        * |----------| |----------|
1228*61046927SAndroid Build Coastguard Worker        *     succ0 \   / succ0
1229*61046927SAndroid Build Coastguard Worker        *        |--- j ----|
1230*61046927SAndroid Build Coastguard Worker        *        |   ...    |
1231*61046927SAndroid Build Coastguard Worker        *        |----------|
1232*61046927SAndroid Build Coastguard Worker        */
1233*61046927SAndroid Build Coastguard Worker       struct ir3_block *succ0 = block->successors[0];
1234*61046927SAndroid Build Coastguard Worker       struct ir3_block *succ1 = block->successors[1];
1235*61046927SAndroid Build Coastguard Worker 
1236*61046927SAndroid Build Coastguard Worker       if (!succ1)
1237*61046927SAndroid Build Coastguard Worker          continue;
1238*61046927SAndroid Build Coastguard Worker 
1239*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *terminator = ir3_block_get_terminator(block);
1240*61046927SAndroid Build Coastguard Worker       if (!terminator)
1241*61046927SAndroid Build Coastguard Worker          continue;
1242*61046927SAndroid Build Coastguard Worker       if (terminator->opc != OPC_PREDT && terminator->opc != OPC_PREDF)
1243*61046927SAndroid Build Coastguard Worker          continue;
1244*61046927SAndroid Build Coastguard Worker 
1245*61046927SAndroid Build Coastguard Worker       assert(!succ0->successors[1] && !succ1->successors[1]);
1246*61046927SAndroid Build Coastguard Worker       assert(succ0->successors[0] == succ1->successors[0]);
1247*61046927SAndroid Build Coastguard Worker       assert(succ0->predecessors_count == 1 && succ1->predecessors_count == 1);
1248*61046927SAndroid Build Coastguard Worker       assert(succ0->index == (block->index + 1));
1249*61046927SAndroid Build Coastguard Worker       assert(succ1->index == (block->index + 2));
1250*61046927SAndroid Build Coastguard Worker 
1251*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *succ0_terminator =
1252*61046927SAndroid Build Coastguard Worker          ir3_block_get_terminator(succ0);
1253*61046927SAndroid Build Coastguard Worker       assert(succ0_terminator);
1254*61046927SAndroid Build Coastguard Worker       assert(succ0_terminator->opc ==
1255*61046927SAndroid Build Coastguard Worker              (terminator->opc == OPC_PREDT ? OPC_PREDF : OPC_PREDT));
1256*61046927SAndroid Build Coastguard Worker 
1257*61046927SAndroid Build Coastguard Worker       ASSERTED struct ir3_instruction *succ1_terminator =
1258*61046927SAndroid Build Coastguard Worker          ir3_block_get_terminator(succ1);
1259*61046927SAndroid Build Coastguard Worker       assert(!succ1_terminator || (succ1_terminator->opc == OPC_JUMP));
1260*61046927SAndroid Build Coastguard Worker 
1261*61046927SAndroid Build Coastguard Worker       /* Simple case: both successors contain instructions. Keep both blocks and
1262*61046927SAndroid Build Coastguard Worker        * insert prede before the second successor's terminator:
1263*61046927SAndroid Build Coastguard Worker        *        |--- i ----|
1264*61046927SAndroid Build Coastguard Worker        *        |   ...    |
1265*61046927SAndroid Build Coastguard Worker        *        | pred[tf] |
1266*61046927SAndroid Build Coastguard Worker        *        |----------|
1267*61046927SAndroid Build Coastguard Worker        *      succ0 /   \ succ1
1268*61046927SAndroid Build Coastguard Worker        * |-- i+1 ---| |-- i+2 ---|
1269*61046927SAndroid Build Coastguard Worker        * |    ...   | |   ...    |
1270*61046927SAndroid Build Coastguard Worker        * | pred[ft] | | prede    |
1271*61046927SAndroid Build Coastguard Worker        * |----------| |----------|
1272*61046927SAndroid Build Coastguard Worker        *     succ0 \   / succ0
1273*61046927SAndroid Build Coastguard Worker        *        |--- j ----|
1274*61046927SAndroid Build Coastguard Worker        *        |   ...    |
1275*61046927SAndroid Build Coastguard Worker        *        |----------|
1276*61046927SAndroid Build Coastguard Worker        */
1277*61046927SAndroid Build Coastguard Worker       if (!list_is_empty(&succ1->instr_list)) {
1278*61046927SAndroid Build Coastguard Worker          ir3_PREDE(succ1);
1279*61046927SAndroid Build Coastguard Worker          continue;
1280*61046927SAndroid Build Coastguard Worker       }
1281*61046927SAndroid Build Coastguard Worker 
1282*61046927SAndroid Build Coastguard Worker       /* Second successor is empty so we can remove it:
1283*61046927SAndroid Build Coastguard Worker        *        |--- i ----|
1284*61046927SAndroid Build Coastguard Worker        *        |   ...    |
1285*61046927SAndroid Build Coastguard Worker        *        | pred[tf] |
1286*61046927SAndroid Build Coastguard Worker        *        |----------|
1287*61046927SAndroid Build Coastguard Worker        *      succ0 /   \ succ1
1288*61046927SAndroid Build Coastguard Worker        * |-- i+1 ---|   |
1289*61046927SAndroid Build Coastguard Worker        * |    ...   |   |
1290*61046927SAndroid Build Coastguard Worker        * |   prede  |   |
1291*61046927SAndroid Build Coastguard Worker        * |----------|   |
1292*61046927SAndroid Build Coastguard Worker        *     succ0 \    /
1293*61046927SAndroid Build Coastguard Worker        *        |--- j ----|
1294*61046927SAndroid Build Coastguard Worker        *        |   ...    |
1295*61046927SAndroid Build Coastguard Worker        *        |----------|
1296*61046927SAndroid Build Coastguard Worker        */
1297*61046927SAndroid Build Coastguard Worker       list_delinit(&succ0_terminator->node);
1298*61046927SAndroid Build Coastguard Worker       ir3_PREDE(succ0);
1299*61046927SAndroid Build Coastguard Worker       remove_unused_block(succ1);
1300*61046927SAndroid Build Coastguard Worker       block->successors[1] = succ0->successors[0];
1301*61046927SAndroid Build Coastguard Worker       ir3_block_add_predecessor(succ0->successors[0], block);
1302*61046927SAndroid Build Coastguard Worker    }
1303*61046927SAndroid Build Coastguard Worker }
1304*61046927SAndroid Build Coastguard Worker 
1305*61046927SAndroid Build Coastguard Worker /* Here we workaround the fact that kill doesn't actually kill the thread as
1306*61046927SAndroid Build Coastguard Worker  * GL expects. The last instruction always needs to be an end instruction,
1307*61046927SAndroid Build Coastguard Worker  * which means that if we're stuck in a loop where kill is the only way out,
1308*61046927SAndroid Build Coastguard Worker  * then we may have to jump out to the end. kill may also have the d3d
1309*61046927SAndroid Build Coastguard Worker  * semantics of converting the thread to a helper thread, rather than setting
1310*61046927SAndroid Build Coastguard Worker  * the exec mask to 0, in which case the helper thread could get stuck in an
1311*61046927SAndroid Build Coastguard Worker  * infinite loop.
1312*61046927SAndroid Build Coastguard Worker  *
1313*61046927SAndroid Build Coastguard Worker  * We do this late, both to give the scheduler the opportunity to reschedule
1314*61046927SAndroid Build Coastguard Worker  * kill instructions earlier and to avoid having to create a separate basic
1315*61046927SAndroid Build Coastguard Worker  * block.
1316*61046927SAndroid Build Coastguard Worker  *
1317*61046927SAndroid Build Coastguard Worker  * TODO: Assuming that the wavefront doesn't stop as soon as all threads are
1318*61046927SAndroid Build Coastguard Worker  * killed, we might benefit by doing this more aggressively when the remaining
1319*61046927SAndroid Build Coastguard Worker  * part of the program after the kill is large, since that would let us
1320*61046927SAndroid Build Coastguard Worker  * skip over the instructions when there are no non-killed threads left.
1321*61046927SAndroid Build Coastguard Worker  */
1322*61046927SAndroid Build Coastguard Worker static void
kill_sched(struct ir3 * ir,struct ir3_shader_variant * so)1323*61046927SAndroid Build Coastguard Worker kill_sched(struct ir3 *ir, struct ir3_shader_variant *so)
1324*61046927SAndroid Build Coastguard Worker {
1325*61046927SAndroid Build Coastguard Worker    ir3_count_instructions(ir);
1326*61046927SAndroid Build Coastguard Worker 
1327*61046927SAndroid Build Coastguard Worker    /* True if we know that this block will always eventually lead to the end
1328*61046927SAndroid Build Coastguard Worker     * block:
1329*61046927SAndroid Build Coastguard Worker     */
1330*61046927SAndroid Build Coastguard Worker    bool always_ends = true;
1331*61046927SAndroid Build Coastguard Worker    bool added = false;
1332*61046927SAndroid Build Coastguard Worker    struct ir3_block *last_block =
1333*61046927SAndroid Build Coastguard Worker       list_last_entry(&ir->block_list, struct ir3_block, node);
1334*61046927SAndroid Build Coastguard Worker 
1335*61046927SAndroid Build Coastguard Worker    foreach_block_rev (block, &ir->block_list) {
1336*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < 2 && block->successors[i]; i++) {
1337*61046927SAndroid Build Coastguard Worker          if (block->successors[i]->start_ip <= block->end_ip)
1338*61046927SAndroid Build Coastguard Worker             always_ends = false;
1339*61046927SAndroid Build Coastguard Worker       }
1340*61046927SAndroid Build Coastguard Worker 
1341*61046927SAndroid Build Coastguard Worker       if (always_ends)
1342*61046927SAndroid Build Coastguard Worker          continue;
1343*61046927SAndroid Build Coastguard Worker 
1344*61046927SAndroid Build Coastguard Worker       foreach_instr_safe (instr, &block->instr_list) {
1345*61046927SAndroid Build Coastguard Worker          if (instr->opc != OPC_KILL)
1346*61046927SAndroid Build Coastguard Worker             continue;
1347*61046927SAndroid Build Coastguard Worker 
1348*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *br = ir3_instr_create(block, OPC_BR, 0, 1);
1349*61046927SAndroid Build Coastguard Worker          ir3_src_create(br, instr->srcs[0]->num, instr->srcs[0]->flags)->wrmask =
1350*61046927SAndroid Build Coastguard Worker             1;
1351*61046927SAndroid Build Coastguard Worker          br->cat0.target =
1352*61046927SAndroid Build Coastguard Worker             list_last_entry(&ir->block_list, struct ir3_block, node);
1353*61046927SAndroid Build Coastguard Worker 
1354*61046927SAndroid Build Coastguard Worker          list_del(&br->node);
1355*61046927SAndroid Build Coastguard Worker          list_add(&br->node, &instr->node);
1356*61046927SAndroid Build Coastguard Worker 
1357*61046927SAndroid Build Coastguard Worker          added = true;
1358*61046927SAndroid Build Coastguard Worker       }
1359*61046927SAndroid Build Coastguard Worker    }
1360*61046927SAndroid Build Coastguard Worker 
1361*61046927SAndroid Build Coastguard Worker    if (added) {
1362*61046927SAndroid Build Coastguard Worker       /* I'm not entirely sure how the branchstack works, but we probably
1363*61046927SAndroid Build Coastguard Worker        * need to add at least one entry for the divergence which is resolved
1364*61046927SAndroid Build Coastguard Worker        * at the end:
1365*61046927SAndroid Build Coastguard Worker        */
1366*61046927SAndroid Build Coastguard Worker       so->branchstack++;
1367*61046927SAndroid Build Coastguard Worker 
1368*61046927SAndroid Build Coastguard Worker       /* We don't update predecessors/successors, so we have to do this
1369*61046927SAndroid Build Coastguard Worker        * manually:
1370*61046927SAndroid Build Coastguard Worker        */
1371*61046927SAndroid Build Coastguard Worker       mark_jp(last_block);
1372*61046927SAndroid Build Coastguard Worker    }
1373*61046927SAndroid Build Coastguard Worker }
1374*61046927SAndroid Build Coastguard Worker 
1375*61046927SAndroid Build Coastguard Worker static void
dbg_sync_sched(struct ir3 * ir,struct ir3_shader_variant * so)1376*61046927SAndroid Build Coastguard Worker dbg_sync_sched(struct ir3 *ir, struct ir3_shader_variant *so)
1377*61046927SAndroid Build Coastguard Worker {
1378*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1379*61046927SAndroid Build Coastguard Worker       foreach_instr_safe (instr, &block->instr_list) {
1380*61046927SAndroid Build Coastguard Worker          if (is_ss_producer(instr) || is_sy_producer(instr)) {
1381*61046927SAndroid Build Coastguard Worker             struct ir3_instruction *nop = ir3_NOP(block);
1382*61046927SAndroid Build Coastguard Worker             nop->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
1383*61046927SAndroid Build Coastguard Worker             ir3_instr_move_after(nop, instr);
1384*61046927SAndroid Build Coastguard Worker          }
1385*61046927SAndroid Build Coastguard Worker       }
1386*61046927SAndroid Build Coastguard Worker    }
1387*61046927SAndroid Build Coastguard Worker }
1388*61046927SAndroid Build Coastguard Worker 
1389*61046927SAndroid Build Coastguard Worker static void
dbg_nop_sched(struct ir3 * ir,struct ir3_shader_variant * so)1390*61046927SAndroid Build Coastguard Worker dbg_nop_sched(struct ir3 *ir, struct ir3_shader_variant *so)
1391*61046927SAndroid Build Coastguard Worker {
1392*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1393*61046927SAndroid Build Coastguard Worker       foreach_instr_safe (instr, &block->instr_list) {
1394*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *nop = ir3_NOP(block);
1395*61046927SAndroid Build Coastguard Worker          nop->repeat = 5;
1396*61046927SAndroid Build Coastguard Worker          ir3_instr_move_before(nop, instr);
1397*61046927SAndroid Build Coastguard Worker       }
1398*61046927SAndroid Build Coastguard Worker    }
1399*61046927SAndroid Build Coastguard Worker }
1400*61046927SAndroid Build Coastguard Worker 
1401*61046927SAndroid Build Coastguard Worker static void
dbg_expand_rpt(struct ir3 * ir)1402*61046927SAndroid Build Coastguard Worker dbg_expand_rpt(struct ir3 *ir)
1403*61046927SAndroid Build Coastguard Worker {
1404*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1405*61046927SAndroid Build Coastguard Worker       foreach_instr_safe (instr, &block->instr_list) {
1406*61046927SAndroid Build Coastguard Worker          if (instr->repeat == 0 || instr->opc == OPC_NOP ||
1407*61046927SAndroid Build Coastguard Worker              instr->opc == OPC_SWZ || instr->opc == OPC_GAT ||
1408*61046927SAndroid Build Coastguard Worker              instr->opc == OPC_SCT) {
1409*61046927SAndroid Build Coastguard Worker             continue;
1410*61046927SAndroid Build Coastguard Worker          }
1411*61046927SAndroid Build Coastguard Worker 
1412*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i <= instr->repeat; ++i) {
1413*61046927SAndroid Build Coastguard Worker             struct ir3_instruction *rpt = ir3_instr_clone(instr);
1414*61046927SAndroid Build Coastguard Worker             ir3_instr_move_before(rpt, instr);
1415*61046927SAndroid Build Coastguard Worker             rpt->repeat = 0;
1416*61046927SAndroid Build Coastguard Worker 
1417*61046927SAndroid Build Coastguard Worker             foreach_dst (dst, rpt) {
1418*61046927SAndroid Build Coastguard Worker                dst->num += i;
1419*61046927SAndroid Build Coastguard Worker                dst->wrmask = 1;
1420*61046927SAndroid Build Coastguard Worker             }
1421*61046927SAndroid Build Coastguard Worker 
1422*61046927SAndroid Build Coastguard Worker             foreach_src (src, rpt) {
1423*61046927SAndroid Build Coastguard Worker                if (!(src->flags & IR3_REG_R))
1424*61046927SAndroid Build Coastguard Worker                   continue;
1425*61046927SAndroid Build Coastguard Worker 
1426*61046927SAndroid Build Coastguard Worker                src->num += i;
1427*61046927SAndroid Build Coastguard Worker                src->uim_val += i;
1428*61046927SAndroid Build Coastguard Worker                src->wrmask = 1;
1429*61046927SAndroid Build Coastguard Worker                src->flags &= ~IR3_REG_R;
1430*61046927SAndroid Build Coastguard Worker             }
1431*61046927SAndroid Build Coastguard Worker          }
1432*61046927SAndroid Build Coastguard Worker 
1433*61046927SAndroid Build Coastguard Worker          list_delinit(&instr->node);
1434*61046927SAndroid Build Coastguard Worker       }
1435*61046927SAndroid Build Coastguard Worker    }
1436*61046927SAndroid Build Coastguard Worker }
1437*61046927SAndroid Build Coastguard Worker 
1438*61046927SAndroid Build Coastguard Worker struct ir3_helper_block_data {
1439*61046927SAndroid Build Coastguard Worker    /* Whether helper invocations may be used on any path starting at the
1440*61046927SAndroid Build Coastguard Worker     * beginning of the block.
1441*61046927SAndroid Build Coastguard Worker     */
1442*61046927SAndroid Build Coastguard Worker    bool uses_helpers_beginning;
1443*61046927SAndroid Build Coastguard Worker 
1444*61046927SAndroid Build Coastguard Worker    /* Whether helper invocations may be used by the end of the block. Branch
1445*61046927SAndroid Build Coastguard Worker     * instructions are considered to be "between" blocks, because (eq) has to be
1446*61046927SAndroid Build Coastguard Worker     * inserted after them in the successor blocks, so branch instructions using
1447*61046927SAndroid Build Coastguard Worker     * helpers will result in uses_helpers_end = true for their block.
1448*61046927SAndroid Build Coastguard Worker     */
1449*61046927SAndroid Build Coastguard Worker    bool uses_helpers_end;
1450*61046927SAndroid Build Coastguard Worker };
1451*61046927SAndroid Build Coastguard Worker 
1452*61046927SAndroid Build Coastguard Worker /* Insert (eq) after the last instruction using the results of helper
1453*61046927SAndroid Build Coastguard Worker  * invocations. Use a backwards dataflow analysis to determine at which points
1454*61046927SAndroid Build Coastguard Worker  * in the program helper invocations are definitely never used, and then insert
1455*61046927SAndroid Build Coastguard Worker  * (eq) at the point where we cross from a point where they may be used to a
1456*61046927SAndroid Build Coastguard Worker  * point where they are never used.
1457*61046927SAndroid Build Coastguard Worker  */
1458*61046927SAndroid Build Coastguard Worker static void
helper_sched(struct ir3_legalize_ctx * ctx,struct ir3 * ir,struct ir3_shader_variant * so)1459*61046927SAndroid Build Coastguard Worker helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir,
1460*61046927SAndroid Build Coastguard Worker              struct ir3_shader_variant *so)
1461*61046927SAndroid Build Coastguard Worker {
1462*61046927SAndroid Build Coastguard Worker    bool non_prefetch_helpers = false;
1463*61046927SAndroid Build Coastguard Worker 
1464*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1465*61046927SAndroid Build Coastguard Worker       struct ir3_helper_block_data *bd =
1466*61046927SAndroid Build Coastguard Worker          rzalloc(ctx, struct ir3_helper_block_data);
1467*61046927SAndroid Build Coastguard Worker       foreach_instr (instr, &block->instr_list) {
1468*61046927SAndroid Build Coastguard Worker          if (uses_helpers(instr)) {
1469*61046927SAndroid Build Coastguard Worker             bd->uses_helpers_beginning = true;
1470*61046927SAndroid Build Coastguard Worker             if (instr->opc != OPC_META_TEX_PREFETCH) {
1471*61046927SAndroid Build Coastguard Worker                non_prefetch_helpers = true;
1472*61046927SAndroid Build Coastguard Worker             }
1473*61046927SAndroid Build Coastguard Worker          }
1474*61046927SAndroid Build Coastguard Worker 
1475*61046927SAndroid Build Coastguard Worker          if (instr->opc == OPC_SHPE) {
1476*61046927SAndroid Build Coastguard Worker             /* (eq) is not allowed in preambles, mark the whole preamble as
1477*61046927SAndroid Build Coastguard Worker              * requiring helpers to avoid putting it there.
1478*61046927SAndroid Build Coastguard Worker              */
1479*61046927SAndroid Build Coastguard Worker             bd->uses_helpers_beginning = true;
1480*61046927SAndroid Build Coastguard Worker             bd->uses_helpers_end = true;
1481*61046927SAndroid Build Coastguard Worker          }
1482*61046927SAndroid Build Coastguard Worker       }
1483*61046927SAndroid Build Coastguard Worker 
1484*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *terminator = ir3_block_get_terminator(block);
1485*61046927SAndroid Build Coastguard Worker       if (terminator) {
1486*61046927SAndroid Build Coastguard Worker          if (terminator->opc == OPC_BALL || terminator->opc == OPC_BANY ||
1487*61046927SAndroid Build Coastguard Worker              (terminator->opc == OPC_GETONE &&
1488*61046927SAndroid Build Coastguard Worker               (terminator->flags & IR3_INSTR_NEEDS_HELPERS))) {
1489*61046927SAndroid Build Coastguard Worker             bd->uses_helpers_beginning = true;
1490*61046927SAndroid Build Coastguard Worker             bd->uses_helpers_end = true;
1491*61046927SAndroid Build Coastguard Worker             non_prefetch_helpers = true;
1492*61046927SAndroid Build Coastguard Worker          }
1493*61046927SAndroid Build Coastguard Worker       }
1494*61046927SAndroid Build Coastguard Worker 
1495*61046927SAndroid Build Coastguard Worker       block->data = bd;
1496*61046927SAndroid Build Coastguard Worker    }
1497*61046927SAndroid Build Coastguard Worker 
1498*61046927SAndroid Build Coastguard Worker    /* If only prefetches use helpers then we can disable them in the shader via
1499*61046927SAndroid Build Coastguard Worker     * a register setting.
1500*61046927SAndroid Build Coastguard Worker     */
1501*61046927SAndroid Build Coastguard Worker    if (!non_prefetch_helpers) {
1502*61046927SAndroid Build Coastguard Worker       so->prefetch_end_of_quad = true;
1503*61046927SAndroid Build Coastguard Worker       return;
1504*61046927SAndroid Build Coastguard Worker    }
1505*61046927SAndroid Build Coastguard Worker 
1506*61046927SAndroid Build Coastguard Worker    bool progress;
1507*61046927SAndroid Build Coastguard Worker    do {
1508*61046927SAndroid Build Coastguard Worker       progress = false;
1509*61046927SAndroid Build Coastguard Worker       foreach_block_rev (block, &ir->block_list) {
1510*61046927SAndroid Build Coastguard Worker          struct ir3_helper_block_data *bd = block->data;
1511*61046927SAndroid Build Coastguard Worker 
1512*61046927SAndroid Build Coastguard Worker          if (!bd->uses_helpers_beginning)
1513*61046927SAndroid Build Coastguard Worker             continue;
1514*61046927SAndroid Build Coastguard Worker 
1515*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
1516*61046927SAndroid Build Coastguard Worker             struct ir3_block *pred = block->physical_predecessors[i];
1517*61046927SAndroid Build Coastguard Worker             struct ir3_helper_block_data *pred_bd = pred->data;
1518*61046927SAndroid Build Coastguard Worker             if (!pred_bd->uses_helpers_end) {
1519*61046927SAndroid Build Coastguard Worker                pred_bd->uses_helpers_end = true;
1520*61046927SAndroid Build Coastguard Worker             }
1521*61046927SAndroid Build Coastguard Worker             if (!pred_bd->uses_helpers_beginning) {
1522*61046927SAndroid Build Coastguard Worker                pred_bd->uses_helpers_beginning = true;
1523*61046927SAndroid Build Coastguard Worker                progress = true;
1524*61046927SAndroid Build Coastguard Worker             }
1525*61046927SAndroid Build Coastguard Worker          }
1526*61046927SAndroid Build Coastguard Worker       }
1527*61046927SAndroid Build Coastguard Worker    } while (progress);
1528*61046927SAndroid Build Coastguard Worker 
1529*61046927SAndroid Build Coastguard Worker    /* Now, we need to determine the points where helper invocations become
1530*61046927SAndroid Build Coastguard Worker     * unused.
1531*61046927SAndroid Build Coastguard Worker     */
1532*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1533*61046927SAndroid Build Coastguard Worker       struct ir3_helper_block_data *bd = block->data;
1534*61046927SAndroid Build Coastguard Worker       if (bd->uses_helpers_end)
1535*61046927SAndroid Build Coastguard Worker          continue;
1536*61046927SAndroid Build Coastguard Worker 
1537*61046927SAndroid Build Coastguard Worker       /* We need to check the predecessors because of situations with critical
1538*61046927SAndroid Build Coastguard Worker        * edges like this that can occur after optimizing jumps:
1539*61046927SAndroid Build Coastguard Worker        *
1540*61046927SAndroid Build Coastguard Worker        *    br p0.x, #endif
1541*61046927SAndroid Build Coastguard Worker        *    ...
1542*61046927SAndroid Build Coastguard Worker        *    sam ...
1543*61046927SAndroid Build Coastguard Worker        *    ...
1544*61046927SAndroid Build Coastguard Worker        *    endif:
1545*61046927SAndroid Build Coastguard Worker        *    ...
1546*61046927SAndroid Build Coastguard Worker        *    end
1547*61046927SAndroid Build Coastguard Worker        *
1548*61046927SAndroid Build Coastguard Worker        * The endif block will have uses_helpers_beginning = false and
1549*61046927SAndroid Build Coastguard Worker        * uses_helpers_end = false, but because we jump to there from the
1550*61046927SAndroid Build Coastguard Worker        * beginning of the if where uses_helpers_end = true, we still want to
1551*61046927SAndroid Build Coastguard Worker        * add an (eq) at the beginning of the block:
1552*61046927SAndroid Build Coastguard Worker        *
1553*61046927SAndroid Build Coastguard Worker        *    br p0.x, #endif
1554*61046927SAndroid Build Coastguard Worker        *    ...
1555*61046927SAndroid Build Coastguard Worker        *    sam ...
1556*61046927SAndroid Build Coastguard Worker        *    (eq)nop
1557*61046927SAndroid Build Coastguard Worker        *    ...
1558*61046927SAndroid Build Coastguard Worker        *    endif:
1559*61046927SAndroid Build Coastguard Worker        *    (eq)nop
1560*61046927SAndroid Build Coastguard Worker        *    ...
1561*61046927SAndroid Build Coastguard Worker        *    end
1562*61046927SAndroid Build Coastguard Worker        *
1563*61046927SAndroid Build Coastguard Worker        * This an extra nop in the case where the branch isn't taken, but that's
1564*61046927SAndroid Build Coastguard Worker        * probably preferable to adding an extra jump instruction which is what
1565*61046927SAndroid Build Coastguard Worker        * would happen if we ran this pass before optimizing jumps:
1566*61046927SAndroid Build Coastguard Worker        *
1567*61046927SAndroid Build Coastguard Worker        *    br p0.x, #else
1568*61046927SAndroid Build Coastguard Worker        *    ...
1569*61046927SAndroid Build Coastguard Worker        *    sam ...
1570*61046927SAndroid Build Coastguard Worker        *    (eq)nop
1571*61046927SAndroid Build Coastguard Worker        *    ...
1572*61046927SAndroid Build Coastguard Worker        *    jump #endif
1573*61046927SAndroid Build Coastguard Worker        *    else:
1574*61046927SAndroid Build Coastguard Worker        *    (eq)nop
1575*61046927SAndroid Build Coastguard Worker        *    endif:
1576*61046927SAndroid Build Coastguard Worker        *    ...
1577*61046927SAndroid Build Coastguard Worker        *    end
1578*61046927SAndroid Build Coastguard Worker        *
1579*61046927SAndroid Build Coastguard Worker        * We also need this to make sure we insert (eq) after branches which use
1580*61046927SAndroid Build Coastguard Worker        * helper invocations.
1581*61046927SAndroid Build Coastguard Worker        */
1582*61046927SAndroid Build Coastguard Worker       bool pred_uses_helpers = bd->uses_helpers_beginning;
1583*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
1584*61046927SAndroid Build Coastguard Worker          struct ir3_block *pred = block->physical_predecessors[i];
1585*61046927SAndroid Build Coastguard Worker          struct ir3_helper_block_data *pred_bd = pred->data;
1586*61046927SAndroid Build Coastguard Worker          if (pred_bd->uses_helpers_end) {
1587*61046927SAndroid Build Coastguard Worker             pred_uses_helpers = true;
1588*61046927SAndroid Build Coastguard Worker             break;
1589*61046927SAndroid Build Coastguard Worker          }
1590*61046927SAndroid Build Coastguard Worker       }
1591*61046927SAndroid Build Coastguard Worker 
1592*61046927SAndroid Build Coastguard Worker       if (!pred_uses_helpers)
1593*61046927SAndroid Build Coastguard Worker          continue;
1594*61046927SAndroid Build Coastguard Worker 
1595*61046927SAndroid Build Coastguard Worker       /* The last use of helpers is somewhere between the beginning and the
1596*61046927SAndroid Build Coastguard Worker        * end. first_instr will be the first instruction where helpers are no
1597*61046927SAndroid Build Coastguard Worker        * longer required, or NULL if helpers are not required just at the end.
1598*61046927SAndroid Build Coastguard Worker        */
1599*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *first_instr = NULL;
1600*61046927SAndroid Build Coastguard Worker       foreach_instr_rev (instr, &block->instr_list) {
1601*61046927SAndroid Build Coastguard Worker          /* Skip prefetches because they actually execute before the block
1602*61046927SAndroid Build Coastguard Worker           * starts and at this stage they aren't guaranteed to be at the start
1603*61046927SAndroid Build Coastguard Worker           * of the block.
1604*61046927SAndroid Build Coastguard Worker           */
1605*61046927SAndroid Build Coastguard Worker          if (uses_helpers(instr) && instr->opc != OPC_META_TEX_PREFETCH)
1606*61046927SAndroid Build Coastguard Worker             break;
1607*61046927SAndroid Build Coastguard Worker          first_instr = instr;
1608*61046927SAndroid Build Coastguard Worker       }
1609*61046927SAndroid Build Coastguard Worker 
1610*61046927SAndroid Build Coastguard Worker       bool killed = false;
1611*61046927SAndroid Build Coastguard Worker       bool expensive_instruction_in_block = false;
1612*61046927SAndroid Build Coastguard Worker       if (first_instr) {
1613*61046927SAndroid Build Coastguard Worker          foreach_instr_from (instr, first_instr, &block->instr_list) {
1614*61046927SAndroid Build Coastguard Worker             /* If there's already a nop, we don't have to worry about whether to
1615*61046927SAndroid Build Coastguard Worker              * insert one.
1616*61046927SAndroid Build Coastguard Worker              */
1617*61046927SAndroid Build Coastguard Worker             if (instr->opc == OPC_NOP) {
1618*61046927SAndroid Build Coastguard Worker                instr->flags |= IR3_INSTR_EQ;
1619*61046927SAndroid Build Coastguard Worker                killed = true;
1620*61046927SAndroid Build Coastguard Worker                break;
1621*61046927SAndroid Build Coastguard Worker             }
1622*61046927SAndroid Build Coastguard Worker 
1623*61046927SAndroid Build Coastguard Worker             /* ALU and SFU instructions probably aren't going to benefit much
1624*61046927SAndroid Build Coastguard Worker              * from killing helper invocations, because they complete at least
1625*61046927SAndroid Build Coastguard Worker              * an entire quad in a cycle and don't access any quad-divergent
1626*61046927SAndroid Build Coastguard Worker              * memory, so delay emitting (eq) in the hopes that we find a nop
1627*61046927SAndroid Build Coastguard Worker              * afterwards.
1628*61046927SAndroid Build Coastguard Worker              */
1629*61046927SAndroid Build Coastguard Worker             if (is_alu(instr) || is_sfu(instr))
1630*61046927SAndroid Build Coastguard Worker                continue;
1631*61046927SAndroid Build Coastguard Worker             if (instr->opc == OPC_PREDE)
1632*61046927SAndroid Build Coastguard Worker                continue;
1633*61046927SAndroid Build Coastguard Worker 
1634*61046927SAndroid Build Coastguard Worker             expensive_instruction_in_block = true;
1635*61046927SAndroid Build Coastguard Worker             break;
1636*61046927SAndroid Build Coastguard Worker          }
1637*61046927SAndroid Build Coastguard Worker       }
1638*61046927SAndroid Build Coastguard Worker 
1639*61046927SAndroid Build Coastguard Worker       /* If this block isn't the last block before the end instruction, assume
1640*61046927SAndroid Build Coastguard Worker        * that there may be expensive instructions in later blocks so it's worth
1641*61046927SAndroid Build Coastguard Worker        * it to insert a nop.
1642*61046927SAndroid Build Coastguard Worker        */
1643*61046927SAndroid Build Coastguard Worker       if (!killed && (expensive_instruction_in_block ||
1644*61046927SAndroid Build Coastguard Worker                       block->successors[0] != ir3_end_block(ir))) {
1645*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *nop = ir3_NOP(block);
1646*61046927SAndroid Build Coastguard Worker          nop->flags |= IR3_INSTR_EQ;
1647*61046927SAndroid Build Coastguard Worker          if (first_instr)
1648*61046927SAndroid Build Coastguard Worker             ir3_instr_move_before(nop, first_instr);
1649*61046927SAndroid Build Coastguard Worker       }
1650*61046927SAndroid Build Coastguard Worker    }
1651*61046927SAndroid Build Coastguard Worker }
1652*61046927SAndroid Build Coastguard Worker 
1653*61046927SAndroid Build Coastguard Worker bool
ir3_legalize(struct ir3 * ir,struct ir3_shader_variant * so,int * max_bary)1654*61046927SAndroid Build Coastguard Worker ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
1655*61046927SAndroid Build Coastguard Worker {
1656*61046927SAndroid Build Coastguard Worker    struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
1657*61046927SAndroid Build Coastguard Worker    bool mergedregs = so->mergedregs;
1658*61046927SAndroid Build Coastguard Worker    bool progress;
1659*61046927SAndroid Build Coastguard Worker 
1660*61046927SAndroid Build Coastguard Worker    ctx->so = so;
1661*61046927SAndroid Build Coastguard Worker    ctx->max_bary = -1;
1662*61046927SAndroid Build Coastguard Worker    ctx->compiler = ir->compiler;
1663*61046927SAndroid Build Coastguard Worker    ctx->type = ir->type;
1664*61046927SAndroid Build Coastguard Worker 
1665*61046927SAndroid Build Coastguard Worker    /* allocate per-block data: */
1666*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1667*61046927SAndroid Build Coastguard Worker       struct ir3_legalize_block_data *bd =
1668*61046927SAndroid Build Coastguard Worker          rzalloc(ctx, struct ir3_legalize_block_data);
1669*61046927SAndroid Build Coastguard Worker 
1670*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->state.needs_ss_war, mergedregs);
1671*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->state.needs_ss_or_sy_war, mergedregs);
1672*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->state.needs_ss_scalar_war, mergedregs);
1673*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->state.needs_ss_or_sy_scalar_war, mergedregs);
1674*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->state.needs_ss_scalar_full, mergedregs);
1675*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->state.needs_ss_scalar_half, mergedregs);
1676*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->state.needs_ss, mergedregs);
1677*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->state.needs_sy, mergedregs);
1678*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->begin_state.needs_ss_war, mergedregs);
1679*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->begin_state.needs_ss_or_sy_war, mergedregs);
1680*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->begin_state.needs_ss_scalar_war, mergedregs);
1681*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->begin_state.needs_ss_or_sy_scalar_war, mergedregs);
1682*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->begin_state.needs_ss_scalar_full, mergedregs);
1683*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->begin_state.needs_ss_scalar_half, mergedregs);
1684*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->begin_state.needs_ss, mergedregs);
1685*61046927SAndroid Build Coastguard Worker       regmask_init(&bd->begin_state.needs_sy, mergedregs);
1686*61046927SAndroid Build Coastguard Worker 
1687*61046927SAndroid Build Coastguard Worker       block->data = bd;
1688*61046927SAndroid Build Coastguard Worker    }
1689*61046927SAndroid Build Coastguard Worker 
1690*61046927SAndroid Build Coastguard Worker    /* We may have failed to pull all input loads into the first block.
1691*61046927SAndroid Build Coastguard Worker     * In such case at the moment we aren't able to find a better place
1692*61046927SAndroid Build Coastguard Worker     * to for (ei) than the end of the program.
1693*61046927SAndroid Build Coastguard Worker     * a5xx and a6xx do automatically release varying storage at the end.
1694*61046927SAndroid Build Coastguard Worker     */
1695*61046927SAndroid Build Coastguard Worker    ctx->early_input_release = true;
1696*61046927SAndroid Build Coastguard Worker 
1697*61046927SAndroid Build Coastguard Worker    struct ir3_block *start_block = ir3_after_preamble(ir);
1698*61046927SAndroid Build Coastguard Worker 
1699*61046927SAndroid Build Coastguard Worker    /* Gather information to determine whether we can enable early preamble.
1700*61046927SAndroid Build Coastguard Worker     */
1701*61046927SAndroid Build Coastguard Worker    bool gpr_in_preamble = false;
1702*61046927SAndroid Build Coastguard Worker    bool pred_in_preamble = false;
1703*61046927SAndroid Build Coastguard Worker    bool relative_in_preamble = false;
1704*61046927SAndroid Build Coastguard Worker    bool in_preamble = start_block != ir3_start_block(ir);
1705*61046927SAndroid Build Coastguard Worker    bool has_preamble = start_block != ir3_start_block(ir);
1706*61046927SAndroid Build Coastguard Worker 
1707*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1708*61046927SAndroid Build Coastguard Worker       if (block == start_block)
1709*61046927SAndroid Build Coastguard Worker          in_preamble = false;
1710*61046927SAndroid Build Coastguard Worker 
1711*61046927SAndroid Build Coastguard Worker       foreach_instr (instr, &block->instr_list) {
1712*61046927SAndroid Build Coastguard Worker          if (is_input(instr)) {
1713*61046927SAndroid Build Coastguard Worker             ctx->has_inputs = true;
1714*61046927SAndroid Build Coastguard Worker             if (block != start_block) {
1715*61046927SAndroid Build Coastguard Worker                ctx->early_input_release = false;
1716*61046927SAndroid Build Coastguard Worker             }
1717*61046927SAndroid Build Coastguard Worker          }
1718*61046927SAndroid Build Coastguard Worker 
1719*61046927SAndroid Build Coastguard Worker          if (is_meta(instr))
1720*61046927SAndroid Build Coastguard Worker             continue;
1721*61046927SAndroid Build Coastguard Worker 
1722*61046927SAndroid Build Coastguard Worker          foreach_src (reg, instr) {
1723*61046927SAndroid Build Coastguard Worker             if (in_preamble) {
1724*61046927SAndroid Build Coastguard Worker                if (!(reg->flags & (IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_SHARED)) &&
1725*61046927SAndroid Build Coastguard Worker                    is_reg_gpr(reg))
1726*61046927SAndroid Build Coastguard Worker                   gpr_in_preamble = true;
1727*61046927SAndroid Build Coastguard Worker                if (reg->flags & IR3_REG_RELATIV)
1728*61046927SAndroid Build Coastguard Worker                   relative_in_preamble = true;
1729*61046927SAndroid Build Coastguard Worker             }
1730*61046927SAndroid Build Coastguard Worker          }
1731*61046927SAndroid Build Coastguard Worker 
1732*61046927SAndroid Build Coastguard Worker          foreach_dst (reg, instr) {
1733*61046927SAndroid Build Coastguard Worker             if (is_dest_gpr(reg)) {
1734*61046927SAndroid Build Coastguard Worker                if (in_preamble) {
1735*61046927SAndroid Build Coastguard Worker                   if (!(reg->flags & IR3_REG_SHARED))
1736*61046927SAndroid Build Coastguard Worker                      gpr_in_preamble = true;
1737*61046927SAndroid Build Coastguard Worker                   if (reg->flags & IR3_REG_RELATIV)
1738*61046927SAndroid Build Coastguard Worker                      relative_in_preamble = true;
1739*61046927SAndroid Build Coastguard Worker                }
1740*61046927SAndroid Build Coastguard Worker             }
1741*61046927SAndroid Build Coastguard Worker          }
1742*61046927SAndroid Build Coastguard Worker 
1743*61046927SAndroid Build Coastguard Worker          if (in_preamble && writes_pred(instr)) {
1744*61046927SAndroid Build Coastguard Worker             pred_in_preamble = true;
1745*61046927SAndroid Build Coastguard Worker          }
1746*61046927SAndroid Build Coastguard Worker       }
1747*61046927SAndroid Build Coastguard Worker    }
1748*61046927SAndroid Build Coastguard Worker 
1749*61046927SAndroid Build Coastguard Worker    so->early_preamble = has_preamble && !gpr_in_preamble &&
1750*61046927SAndroid Build Coastguard Worker       !pred_in_preamble && !relative_in_preamble &&
1751*61046927SAndroid Build Coastguard Worker       ir->compiler->has_early_preamble &&
1752*61046927SAndroid Build Coastguard Worker       !(ir3_shader_debug & IR3_DBG_NOEARLYPREAMBLE);
1753*61046927SAndroid Build Coastguard Worker 
1754*61046927SAndroid Build Coastguard Worker    /* On a7xx, sync behavior for a1.x is different in the early preamble. RaW
1755*61046927SAndroid Build Coastguard Worker     * dependencies must be synchronized with (ss) there must be an extra
1756*61046927SAndroid Build Coastguard Worker     * (r) on the source of the mova1 instruction.
1757*61046927SAndroid Build Coastguard Worker     */
1758*61046927SAndroid Build Coastguard Worker    if (so->early_preamble && ir->compiler->gen >= 7) {
1759*61046927SAndroid Build Coastguard Worker       foreach_block (block, &ir->block_list) {
1760*61046927SAndroid Build Coastguard Worker          if (block == start_block)
1761*61046927SAndroid Build Coastguard Worker             break;
1762*61046927SAndroid Build Coastguard Worker          block->in_early_preamble = true;
1763*61046927SAndroid Build Coastguard Worker       }
1764*61046927SAndroid Build Coastguard Worker    }
1765*61046927SAndroid Build Coastguard Worker 
1766*61046927SAndroid Build Coastguard Worker    assert(ctx->early_input_release || ctx->compiler->gen >= 5);
1767*61046927SAndroid Build Coastguard Worker 
1768*61046927SAndroid Build Coastguard Worker    if (ir3_shader_debug & IR3_DBG_EXPANDRPT) {
1769*61046927SAndroid Build Coastguard Worker       dbg_expand_rpt(ir);
1770*61046927SAndroid Build Coastguard Worker    }
1771*61046927SAndroid Build Coastguard Worker 
1772*61046927SAndroid Build Coastguard Worker    /* process each block: */
1773*61046927SAndroid Build Coastguard Worker    do {
1774*61046927SAndroid Build Coastguard Worker       progress = false;
1775*61046927SAndroid Build Coastguard Worker       foreach_block (block, &ir->block_list) {
1776*61046927SAndroid Build Coastguard Worker          progress |= legalize_block(ctx, block);
1777*61046927SAndroid Build Coastguard Worker       }
1778*61046927SAndroid Build Coastguard Worker    } while (progress);
1779*61046927SAndroid Build Coastguard Worker 
1780*61046927SAndroid Build Coastguard Worker    *max_bary = ctx->max_bary;
1781*61046927SAndroid Build Coastguard Worker 
1782*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1783*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *terminator = ir3_block_get_terminator(block);
1784*61046927SAndroid Build Coastguard Worker       if (terminator && terminator->opc == OPC_GETONE) {
1785*61046927SAndroid Build Coastguard Worker          apply_push_consts_load_macro(ctx, block->successors[0]);
1786*61046927SAndroid Build Coastguard Worker          break;
1787*61046927SAndroid Build Coastguard Worker       }
1788*61046927SAndroid Build Coastguard Worker    }
1789*61046927SAndroid Build Coastguard Worker 
1790*61046927SAndroid Build Coastguard Worker    block_sched(ir);
1791*61046927SAndroid Build Coastguard Worker 
1792*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1793*61046927SAndroid Build Coastguard Worker       progress |= apply_fine_deriv_macro(ctx, block);
1794*61046927SAndroid Build Coastguard Worker    }
1795*61046927SAndroid Build Coastguard Worker 
1796*61046927SAndroid Build Coastguard Worker    if (ir3_shader_debug & IR3_DBG_FULLSYNC) {
1797*61046927SAndroid Build Coastguard Worker       dbg_sync_sched(ir, so);
1798*61046927SAndroid Build Coastguard Worker    }
1799*61046927SAndroid Build Coastguard Worker 
1800*61046927SAndroid Build Coastguard Worker    if (ir3_shader_debug & IR3_DBG_FULLNOP) {
1801*61046927SAndroid Build Coastguard Worker       dbg_nop_sched(ir, so);
1802*61046927SAndroid Build Coastguard Worker    }
1803*61046927SAndroid Build Coastguard Worker 
1804*61046927SAndroid Build Coastguard Worker    bool cfg_changed = false;
1805*61046927SAndroid Build Coastguard Worker    while (opt_jump(ir))
1806*61046927SAndroid Build Coastguard Worker       cfg_changed = true;
1807*61046927SAndroid Build Coastguard Worker 
1808*61046927SAndroid Build Coastguard Worker    prede_sched(ir);
1809*61046927SAndroid Build Coastguard Worker 
1810*61046927SAndroid Build Coastguard Worker    if (cfg_changed)
1811*61046927SAndroid Build Coastguard Worker       ir3_calc_reconvergence(so);
1812*61046927SAndroid Build Coastguard Worker 
1813*61046927SAndroid Build Coastguard Worker    if (so->type == MESA_SHADER_FRAGMENT)
1814*61046927SAndroid Build Coastguard Worker       kill_sched(ir, so);
1815*61046927SAndroid Build Coastguard Worker 
1816*61046927SAndroid Build Coastguard Worker    /* TODO: does (eq) exist before a6xx? */
1817*61046927SAndroid Build Coastguard Worker    if (so->type == MESA_SHADER_FRAGMENT && so->need_pixlod &&
1818*61046927SAndroid Build Coastguard Worker        so->compiler->gen >= 6)
1819*61046927SAndroid Build Coastguard Worker       helper_sched(ctx, ir, so);
1820*61046927SAndroid Build Coastguard Worker 
1821*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
1822*61046927SAndroid Build Coastguard Worker       progress |= expand_dummy_dests(block);
1823*61046927SAndroid Build Coastguard Worker    }
1824*61046927SAndroid Build Coastguard Worker 
1825*61046927SAndroid Build Coastguard Worker    ir3_count_instructions(ir);
1826*61046927SAndroid Build Coastguard Worker    resolve_jumps(ir);
1827*61046927SAndroid Build Coastguard Worker 
1828*61046927SAndroid Build Coastguard Worker    mark_xvergence_points(ir);
1829*61046927SAndroid Build Coastguard Worker 
1830*61046927SAndroid Build Coastguard Worker    ralloc_free(ctx);
1831*61046927SAndroid Build Coastguard Worker 
1832*61046927SAndroid Build Coastguard Worker    return true;
1833*61046927SAndroid Build Coastguard Worker }
1834