xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_reconvergence.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2023 Valve Corporation
3*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker  */
5*61046927SAndroid Build Coastguard Worker 
6*61046927SAndroid Build Coastguard Worker /* The pass uses information on which branches are divergent in order to
7*61046927SAndroid Build Coastguard Worker  * determine which blocks are "reconvergence points" where parked threads may
8*61046927SAndroid Build Coastguard Worker  * become reactivated as well as to add "physical" edges where the machine may
9*61046927SAndroid Build Coastguard Worker  * fall through to the next reconvergence point. Reconvergence points need a
10*61046927SAndroid Build Coastguard Worker  * (jp) added in the assembly, and physical edges are needed to model shared
11*61046927SAndroid Build Coastguard Worker  * register liveness correctly. Reconvergence happens in the following two
12*61046927SAndroid Build Coastguard Worker  * scenarios:
13*61046927SAndroid Build Coastguard Worker  *
14*61046927SAndroid Build Coastguard Worker  * 1. When there is a divergent branch, the later of the two block destinations
15*61046927SAndroid Build Coastguard Worker  *    becomes a reconvergence point.
16*61046927SAndroid Build Coastguard Worker  * 2. When a forward edge crosses over a reconvergence point that may be
17*61046927SAndroid Build Coastguard Worker  *    outstanding at the start of the edge, we need to park the threads that
18*61046927SAndroid Build Coastguard Worker  *    take the edge and resume execution at the reconvergence point. This means
19*61046927SAndroid Build Coastguard Worker  *    that there is a physical edge from the start of the edge to the
20*61046927SAndroid Build Coastguard Worker  *    reconvergence point, and the destination of the edge becomes a new
21*61046927SAndroid Build Coastguard Worker  *    reconvergence point.
22*61046927SAndroid Build Coastguard Worker  *
23*61046927SAndroid Build Coastguard Worker  * For example, consider this simple if-else:
24*61046927SAndroid Build Coastguard Worker  *
25*61046927SAndroid Build Coastguard Worker  *    bb0:
26*61046927SAndroid Build Coastguard Worker  *    ...
27*61046927SAndroid Build Coastguard Worker  *    br p0.x, #bb1, #bb2
28*61046927SAndroid Build Coastguard Worker  *    bb1:
29*61046927SAndroid Build Coastguard Worker  *    ...
30*61046927SAndroid Build Coastguard Worker  *    jump bb3
31*61046927SAndroid Build Coastguard Worker  *    bb2:
32*61046927SAndroid Build Coastguard Worker  *    ...
33*61046927SAndroid Build Coastguard Worker  *    jump bb3
34*61046927SAndroid Build Coastguard Worker  *    bb3:
35*61046927SAndroid Build Coastguard Worker  *    ...
36*61046927SAndroid Build Coastguard Worker  *
37*61046927SAndroid Build Coastguard Worker  * The divergent branch at the end of bb0 makes bb2 a reconvergence point
38*61046927SAndroid Build Coastguard Worker  * following (1), which starts being outstanding after the branch at the end of
39*61046927SAndroid Build Coastguard Worker  * bb1. The jump to bb3 at the end of bb1 goes over bb2 while it is outstanding,
40*61046927SAndroid Build Coastguard Worker  * so there is a physical edge from bb1 to bb2 and bb3 is a reconvergence point
41*61046927SAndroid Build Coastguard Worker  * following (2).
42*61046927SAndroid Build Coastguard Worker  *
43*61046927SAndroid Build Coastguard Worker  * Note that (2) can apply recursively. To handle this efficiently we build an
44*61046927SAndroid Build Coastguard Worker  * interval tree of forward edges that cross other blocks and whenever a block
45*61046927SAndroid Build Coastguard Worker  * becomes a RP we iterate through the edges jumping across it using the tree.
46*61046927SAndroid Build Coastguard Worker  * We also need to keep track of the range where each RP may be
47*61046927SAndroid Build Coastguard Worker  * "outstanding." A RP becomes outstanding after a branch to it parks its
48*61046927SAndroid Build Coastguard Worker  * threads there. This range may increase in size as we discover more and more
49*61046927SAndroid Build Coastguard Worker  * branches to it that may park their threads there.
50*61046927SAndroid Build Coastguard Worker  *
51*61046927SAndroid Build Coastguard Worker  * Finally, we need to compute the branchstack value, which is the maximum
52*61046927SAndroid Build Coastguard Worker  * number of outstanding reconvergence points. For the if-else, the branchstack
53*61046927SAndroid Build Coastguard Worker  * is 2, because after the jump at the end of bb2 both reconvergence points are
54*61046927SAndroid Build Coastguard Worker  * outstanding (although the first is removed immediately afterwards). Because
55*61046927SAndroid Build Coastguard Worker  * we already computed the range where each RP is outstanding, this part is
56*61046927SAndroid Build Coastguard Worker  * relatively straightforward.
57*61046927SAndroid Build Coastguard Worker  */
58*61046927SAndroid Build Coastguard Worker 
59*61046927SAndroid Build Coastguard Worker #include <limits.h>
60*61046927SAndroid Build Coastguard Worker 
61*61046927SAndroid Build Coastguard Worker #include "ir3_shader.h"
62*61046927SAndroid Build Coastguard Worker 
63*61046927SAndroid Build Coastguard Worker #include "util/rb_tree.h"
64*61046927SAndroid Build Coastguard Worker #include "util/u_worklist.h"
65*61046927SAndroid Build Coastguard Worker #include "util/ralloc.h"
66*61046927SAndroid Build Coastguard Worker 
67*61046927SAndroid Build Coastguard Worker struct logical_edge {
68*61046927SAndroid Build Coastguard Worker    struct uinterval_node node;
69*61046927SAndroid Build Coastguard Worker    struct ir3_block *start_block;
70*61046927SAndroid Build Coastguard Worker    struct ir3_block *end_block;
71*61046927SAndroid Build Coastguard Worker };
72*61046927SAndroid Build Coastguard Worker 
73*61046927SAndroid Build Coastguard Worker struct block_data {
74*61046927SAndroid Build Coastguard Worker    /* For a reconvergance point, the index of the first block where, upon
75*61046927SAndroid Build Coastguard Worker     * exiting, the RP may be outstanding. Normally this is a predecessor but may
76*61046927SAndroid Build Coastguard Worker     * be a loop header for loops.
77*61046927SAndroid Build Coastguard Worker     */
78*61046927SAndroid Build Coastguard Worker    unsigned first_divergent_pred;
79*61046927SAndroid Build Coastguard Worker 
80*61046927SAndroid Build Coastguard Worker    /* The last processed first_divergent_pred. */
81*61046927SAndroid Build Coastguard Worker    unsigned first_processed_divergent_pred;
82*61046927SAndroid Build Coastguard Worker 
83*61046927SAndroid Build Coastguard Worker    /* The number of blocks that have this block as a first_divergent_pred. */
84*61046927SAndroid Build Coastguard Worker    unsigned divergence_count;
85*61046927SAndroid Build Coastguard Worker };
86*61046927SAndroid Build Coastguard Worker 
87*61046927SAndroid Build Coastguard Worker void
ir3_calc_reconvergence(struct ir3_shader_variant * so)88*61046927SAndroid Build Coastguard Worker ir3_calc_reconvergence(struct ir3_shader_variant *so)
89*61046927SAndroid Build Coastguard Worker {
90*61046927SAndroid Build Coastguard Worker    void *mem_ctx = ralloc_context(NULL);
91*61046927SAndroid Build Coastguard Worker 
92*61046927SAndroid Build Coastguard Worker    /* It's important that the index we use corresponds to the final order blocks
93*61046927SAndroid Build Coastguard Worker     * are emitted in!
94*61046927SAndroid Build Coastguard Worker     */
95*61046927SAndroid Build Coastguard Worker    unsigned index = 0;
96*61046927SAndroid Build Coastguard Worker    foreach_block (block, &so->ir->block_list) {
97*61046927SAndroid Build Coastguard Worker       block->index = index++;
98*61046927SAndroid Build Coastguard Worker    }
99*61046927SAndroid Build Coastguard Worker 
100*61046927SAndroid Build Coastguard Worker    /* Setup the tree of edges */
101*61046927SAndroid Build Coastguard Worker    unsigned edge_count = 0;
102*61046927SAndroid Build Coastguard Worker    foreach_block (block, &so->ir->block_list) {
103*61046927SAndroid Build Coastguard Worker       if (block->successors[0])
104*61046927SAndroid Build Coastguard Worker          edge_count++;
105*61046927SAndroid Build Coastguard Worker       if (block->successors[1])
106*61046927SAndroid Build Coastguard Worker          edge_count++;
107*61046927SAndroid Build Coastguard Worker 
108*61046927SAndroid Build Coastguard Worker       block->physical_predecessors_count = 0;
109*61046927SAndroid Build Coastguard Worker       block->physical_successors_count = 0;
110*61046927SAndroid Build Coastguard Worker       block->reconvergence_point = false;
111*61046927SAndroid Build Coastguard Worker    }
112*61046927SAndroid Build Coastguard Worker 
113*61046927SAndroid Build Coastguard Worker    struct rb_tree forward_edges, backward_edges;
114*61046927SAndroid Build Coastguard Worker    rb_tree_init(&forward_edges);
115*61046927SAndroid Build Coastguard Worker    rb_tree_init(&backward_edges);
116*61046927SAndroid Build Coastguard Worker 
117*61046927SAndroid Build Coastguard Worker    unsigned edge = 0;
118*61046927SAndroid Build Coastguard Worker    struct logical_edge *edges =
119*61046927SAndroid Build Coastguard Worker       ralloc_array(mem_ctx, struct logical_edge, edge_count);
120*61046927SAndroid Build Coastguard Worker    struct block_data *blocks =
121*61046927SAndroid Build Coastguard Worker       ralloc_array(mem_ctx, struct block_data, index);
122*61046927SAndroid Build Coastguard Worker    foreach_block (block, &so->ir->block_list) {
123*61046927SAndroid Build Coastguard Worker       blocks[block->index].divergence_count = 0;
124*61046927SAndroid Build Coastguard Worker       blocks[block->index].first_divergent_pred = UINT_MAX;
125*61046927SAndroid Build Coastguard Worker       blocks[block->index].first_processed_divergent_pred = UINT_MAX;
126*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
127*61046927SAndroid Build Coastguard Worker          if (block->successors[i]) {
128*61046927SAndroid Build Coastguard Worker             ir3_block_link_physical(block, block->successors[i]);
129*61046927SAndroid Build Coastguard Worker 
130*61046927SAndroid Build Coastguard Worker             if (block->successors[i]->index > block->index + 1) {
131*61046927SAndroid Build Coastguard Worker                edges[edge] = (struct logical_edge) {
132*61046927SAndroid Build Coastguard Worker                   .node = {
133*61046927SAndroid Build Coastguard Worker                      .interval = {
134*61046927SAndroid Build Coastguard Worker                         block->index + 1,
135*61046927SAndroid Build Coastguard Worker                         block->successors[i]->index - 1
136*61046927SAndroid Build Coastguard Worker                      },
137*61046927SAndroid Build Coastguard Worker                   },
138*61046927SAndroid Build Coastguard Worker                   .start_block = block,
139*61046927SAndroid Build Coastguard Worker                   .end_block = block->successors[i],
140*61046927SAndroid Build Coastguard Worker                };
141*61046927SAndroid Build Coastguard Worker 
142*61046927SAndroid Build Coastguard Worker                uinterval_tree_insert(&forward_edges, &edges[edge++].node);
143*61046927SAndroid Build Coastguard Worker             } else if (block->successors[i]->index <= block->index) {
144*61046927SAndroid Build Coastguard Worker                edges[edge] = (struct logical_edge) {
145*61046927SAndroid Build Coastguard Worker                   .node = {
146*61046927SAndroid Build Coastguard Worker                      .interval = {
147*61046927SAndroid Build Coastguard Worker                         block->successors[i]->index - 1,
148*61046927SAndroid Build Coastguard Worker                         block->index + 1
149*61046927SAndroid Build Coastguard Worker                      },
150*61046927SAndroid Build Coastguard Worker                   },
151*61046927SAndroid Build Coastguard Worker                   .start_block = block->successors[i],
152*61046927SAndroid Build Coastguard Worker                   .end_block = block,
153*61046927SAndroid Build Coastguard Worker                };
154*61046927SAndroid Build Coastguard Worker 
155*61046927SAndroid Build Coastguard Worker                uinterval_tree_insert(&backward_edges, &edges[edge++].node);
156*61046927SAndroid Build Coastguard Worker             }
157*61046927SAndroid Build Coastguard Worker          }
158*61046927SAndroid Build Coastguard Worker       }
159*61046927SAndroid Build Coastguard Worker    }
160*61046927SAndroid Build Coastguard Worker 
161*61046927SAndroid Build Coastguard Worker    assert(edge <= edge_count);
162*61046927SAndroid Build Coastguard Worker 
163*61046927SAndroid Build Coastguard Worker    u_worklist worklist;
164*61046927SAndroid Build Coastguard Worker    u_worklist_init(&worklist, index, mem_ctx);
165*61046927SAndroid Build Coastguard Worker 
166*61046927SAndroid Build Coastguard Worker    /* First, find and mark divergent branches. The later destination will be the
167*61046927SAndroid Build Coastguard Worker     * reconvergence point.
168*61046927SAndroid Build Coastguard Worker     */
169*61046927SAndroid Build Coastguard Worker    foreach_block (block, &so->ir->block_list) {
170*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *terminator = ir3_block_get_terminator(block);
171*61046927SAndroid Build Coastguard Worker       if (!terminator)
172*61046927SAndroid Build Coastguard Worker          continue;
173*61046927SAndroid Build Coastguard Worker       if (terminator->opc == OPC_PREDT || terminator->opc == OPC_PREDF)
174*61046927SAndroid Build Coastguard Worker          continue;
175*61046927SAndroid Build Coastguard Worker       if (block->successors[0] && block->successors[1] &&
176*61046927SAndroid Build Coastguard Worker           block->divergent_condition) {
177*61046927SAndroid Build Coastguard Worker          struct ir3_block *reconv_points[2];
178*61046927SAndroid Build Coastguard Worker          unsigned num_reconv_points;
179*61046927SAndroid Build Coastguard Worker          struct ir3_instruction *prev_instr = NULL;
180*61046927SAndroid Build Coastguard Worker 
181*61046927SAndroid Build Coastguard Worker          if (!list_is_singular(&block->instr_list)) {
182*61046927SAndroid Build Coastguard Worker             prev_instr =
183*61046927SAndroid Build Coastguard Worker                list_entry(terminator->node.prev, struct ir3_instruction, node);
184*61046927SAndroid Build Coastguard Worker          }
185*61046927SAndroid Build Coastguard Worker 
186*61046927SAndroid Build Coastguard Worker          if (prev_instr && is_terminator(prev_instr)) {
187*61046927SAndroid Build Coastguard Worker             /* There are two terminating branches so both successors are
188*61046927SAndroid Build Coastguard Worker              * reconvergence points (i.e., there is no fall through into the
189*61046927SAndroid Build Coastguard Worker              * next block). This can only happen after ir3_legalize when we fail
190*61046927SAndroid Build Coastguard Worker              * to eliminate a non-invertible branch. For example:
191*61046927SAndroid Build Coastguard Worker              * getone #bb0
192*61046927SAndroid Build Coastguard Worker              * jump #bb1
193*61046927SAndroid Build Coastguard Worker              * bb0: (jp)...
194*61046927SAndroid Build Coastguard Worker              * bb1: (jp)...
195*61046927SAndroid Build Coastguard Worker              */
196*61046927SAndroid Build Coastguard Worker             reconv_points[0] = block->successors[0];
197*61046927SAndroid Build Coastguard Worker             reconv_points[1] = block->successors[1];
198*61046927SAndroid Build Coastguard Worker             num_reconv_points = 2;
199*61046927SAndroid Build Coastguard Worker          } else {
200*61046927SAndroid Build Coastguard Worker             unsigned idx =
201*61046927SAndroid Build Coastguard Worker                block->successors[0]->index > block->successors[1]->index ? 0
202*61046927SAndroid Build Coastguard Worker                                                                          : 1;
203*61046927SAndroid Build Coastguard Worker             reconv_points[0] = block->successors[idx];
204*61046927SAndroid Build Coastguard Worker             reconv_points[1] = NULL;
205*61046927SAndroid Build Coastguard Worker             num_reconv_points = 1;
206*61046927SAndroid Build Coastguard Worker          }
207*61046927SAndroid Build Coastguard Worker 
208*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < num_reconv_points; i++) {
209*61046927SAndroid Build Coastguard Worker             struct ir3_block *reconv_point = reconv_points[i];
210*61046927SAndroid Build Coastguard Worker             reconv_point->reconvergence_point = true;
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker             struct block_data *reconv_point_data = &blocks[reconv_point->index];
213*61046927SAndroid Build Coastguard Worker             if (reconv_point_data->first_divergent_pred > block->index) {
214*61046927SAndroid Build Coastguard Worker                reconv_point_data->first_divergent_pred = block->index;
215*61046927SAndroid Build Coastguard Worker             }
216*61046927SAndroid Build Coastguard Worker 
217*61046927SAndroid Build Coastguard Worker             u_worklist_push_tail(&worklist, reconv_point, index);
218*61046927SAndroid Build Coastguard Worker          }
219*61046927SAndroid Build Coastguard Worker       }
220*61046927SAndroid Build Coastguard Worker    }
221*61046927SAndroid Build Coastguard Worker 
222*61046927SAndroid Build Coastguard Worker    while (!u_worklist_is_empty(&worklist)) {
223*61046927SAndroid Build Coastguard Worker       struct ir3_block *block =
224*61046927SAndroid Build Coastguard Worker          u_worklist_pop_head(&worklist, struct ir3_block, index);
225*61046927SAndroid Build Coastguard Worker       assert(block->reconvergence_point);
226*61046927SAndroid Build Coastguard Worker 
227*61046927SAndroid Build Coastguard Worker       /* Backwards branches extend the range of divergence. For example, a
228*61046927SAndroid Build Coastguard Worker        * divergent break creates a reconvergence point after the loop that
229*61046927SAndroid Build Coastguard Worker        * stays outstanding throughout subsequent iterations, even at points
230*61046927SAndroid Build Coastguard Worker        * before the break. This takes that into account.
231*61046927SAndroid Build Coastguard Worker        *
232*61046927SAndroid Build Coastguard Worker        * More precisely, a backwards edge that originates between the block and
233*61046927SAndroid Build Coastguard Worker        * it's first_divergent_pred (i.e. in the divergence range) extends the
234*61046927SAndroid Build Coastguard Worker        * divergence range to the beginning of its destination if it is taken, or
235*61046927SAndroid Build Coastguard Worker        * alternatively to the end of the block before its destination.
236*61046927SAndroid Build Coastguard Worker        */
237*61046927SAndroid Build Coastguard Worker       struct uinterval interval2 = {
238*61046927SAndroid Build Coastguard Worker          blocks[block->index].first_divergent_pred,
239*61046927SAndroid Build Coastguard Worker          blocks[block->index].first_divergent_pred
240*61046927SAndroid Build Coastguard Worker       };
241*61046927SAndroid Build Coastguard Worker       uinterval_tree_foreach (struct logical_edge, back_edge, interval2, &backward_edges,
242*61046927SAndroid Build Coastguard Worker                               node) {
243*61046927SAndroid Build Coastguard Worker          if (back_edge->end_block->index < block->index) {
244*61046927SAndroid Build Coastguard Worker             if (blocks[block->index].first_divergent_pred >
245*61046927SAndroid Build Coastguard Worker                 back_edge->start_block->index - 1) {
246*61046927SAndroid Build Coastguard Worker                blocks[block->index].first_divergent_pred =
247*61046927SAndroid Build Coastguard Worker                   back_edge->start_block->index - 1;
248*61046927SAndroid Build Coastguard Worker             }
249*61046927SAndroid Build Coastguard Worker          }
250*61046927SAndroid Build Coastguard Worker       }
251*61046927SAndroid Build Coastguard Worker 
252*61046927SAndroid Build Coastguard Worker       /* Iterate over all edges stepping over the block. */
253*61046927SAndroid Build Coastguard Worker       struct uinterval interval = { block->index, block->index };
254*61046927SAndroid Build Coastguard Worker       struct logical_edge *prev = NULL;
255*61046927SAndroid Build Coastguard Worker       uinterval_tree_foreach (struct logical_edge, edge, interval, &forward_edges,
256*61046927SAndroid Build Coastguard Worker                               node) {
257*61046927SAndroid Build Coastguard Worker          /* If "block" definitely isn't outstanding when the branch
258*61046927SAndroid Build Coastguard Worker           * corresponding to "edge" is taken, then we don't need to park
259*61046927SAndroid Build Coastguard Worker           * "edge->end_block" and we can ignore this.
260*61046927SAndroid Build Coastguard Worker           *
261*61046927SAndroid Build Coastguard Worker           * TODO: add uinterval_tree_foreach_from() and use that instead.
262*61046927SAndroid Build Coastguard Worker           */
263*61046927SAndroid Build Coastguard Worker          if (edge->start_block->index <= blocks[block->index].first_divergent_pred)
264*61046927SAndroid Build Coastguard Worker             continue;
265*61046927SAndroid Build Coastguard Worker 
266*61046927SAndroid Build Coastguard Worker          /* If we've already processed this edge + RP pair, don't process it
267*61046927SAndroid Build Coastguard Worker           * again. Because edges are ordered by start point, we must have
268*61046927SAndroid Build Coastguard Worker           * processed every edge after this too.
269*61046927SAndroid Build Coastguard Worker           */
270*61046927SAndroid Build Coastguard Worker          if (edge->start_block->index >
271*61046927SAndroid Build Coastguard Worker              blocks[block->index].first_processed_divergent_pred)
272*61046927SAndroid Build Coastguard Worker             break;
273*61046927SAndroid Build Coastguard Worker 
274*61046927SAndroid Build Coastguard Worker          edge->end_block->reconvergence_point = true;
275*61046927SAndroid Build Coastguard Worker          if (blocks[edge->end_block->index].first_divergent_pred >
276*61046927SAndroid Build Coastguard Worker              edge->start_block->index) {
277*61046927SAndroid Build Coastguard Worker             blocks[edge->end_block->index].first_divergent_pred =
278*61046927SAndroid Build Coastguard Worker                edge->start_block->index;
279*61046927SAndroid Build Coastguard Worker             u_worklist_push_tail(&worklist, edge->end_block, index);
280*61046927SAndroid Build Coastguard Worker          }
281*61046927SAndroid Build Coastguard Worker 
282*61046927SAndroid Build Coastguard Worker          if (!prev || prev->start_block != edge->start_block) {
283*61046927SAndroid Build Coastguard Worker             /* We should only process this edge + block combination once, and
284*61046927SAndroid Build Coastguard Worker              * we use the fact that edges are sorted by start point to avoid
285*61046927SAndroid Build Coastguard Worker              * adding redundant physical edges in case multiple edges have the
286*61046927SAndroid Build Coastguard Worker              * same start point by comparing with the previous edge. Therefore
287*61046927SAndroid Build Coastguard Worker              * we should only add the physical edge once.
288*61046927SAndroid Build Coastguard Worker              * However, we should skip logical successors of the edge's start
289*61046927SAndroid Build Coastguard Worker              * block since physical edges for those have already been added
290*61046927SAndroid Build Coastguard Worker              * initially.
291*61046927SAndroid Build Coastguard Worker              */
292*61046927SAndroid Build Coastguard Worker             if (block != edge->start_block->successors[0] &&
293*61046927SAndroid Build Coastguard Worker                 block != edge->start_block->successors[1]) {
294*61046927SAndroid Build Coastguard Worker                for (unsigned i = 0; i < block->physical_predecessors_count; i++)
295*61046927SAndroid Build Coastguard Worker                   assert(block->physical_predecessors[i] != edge->start_block);
296*61046927SAndroid Build Coastguard Worker                ir3_block_link_physical(edge->start_block, block);
297*61046927SAndroid Build Coastguard Worker             }
298*61046927SAndroid Build Coastguard Worker          }
299*61046927SAndroid Build Coastguard Worker          prev = edge;
300*61046927SAndroid Build Coastguard Worker       }
301*61046927SAndroid Build Coastguard Worker 
302*61046927SAndroid Build Coastguard Worker       blocks[block->index].first_processed_divergent_pred =
303*61046927SAndroid Build Coastguard Worker          blocks[block->index].first_divergent_pred;
304*61046927SAndroid Build Coastguard Worker    }
305*61046927SAndroid Build Coastguard Worker 
306*61046927SAndroid Build Coastguard Worker    /* For each reconvergent point p we have an open range
307*61046927SAndroid Build Coastguard Worker     * (p->first_divergent_pred, p) where p may be outstanding. We need to keep
308*61046927SAndroid Build Coastguard Worker     * track of the number of outstanding RPs and calculate the maximum.
309*61046927SAndroid Build Coastguard Worker     */
310*61046927SAndroid Build Coastguard Worker    foreach_block (block, &so->ir->block_list) {
311*61046927SAndroid Build Coastguard Worker       if (block->reconvergence_point) {
312*61046927SAndroid Build Coastguard Worker          blocks[blocks[block->index].first_divergent_pred].divergence_count++;
313*61046927SAndroid Build Coastguard Worker       }
314*61046927SAndroid Build Coastguard Worker    }
315*61046927SAndroid Build Coastguard Worker 
316*61046927SAndroid Build Coastguard Worker    unsigned rc_level = 0;
317*61046927SAndroid Build Coastguard Worker    so->branchstack = 0;
318*61046927SAndroid Build Coastguard Worker    foreach_block (block, &so->ir->block_list) {
319*61046927SAndroid Build Coastguard Worker       if (block->reconvergence_point)
320*61046927SAndroid Build Coastguard Worker          rc_level--;
321*61046927SAndroid Build Coastguard Worker 
322*61046927SAndroid Build Coastguard Worker       /* Account for lowerings that produce divergent control flow. */
323*61046927SAndroid Build Coastguard Worker       foreach_instr (instr, &block->instr_list) {
324*61046927SAndroid Build Coastguard Worker          switch (instr->opc) {
325*61046927SAndroid Build Coastguard Worker          case OPC_SCAN_MACRO:
326*61046927SAndroid Build Coastguard Worker             so->branchstack = MAX2(so->branchstack, rc_level + 2);
327*61046927SAndroid Build Coastguard Worker             break;
328*61046927SAndroid Build Coastguard Worker          case OPC_BALLOT_MACRO:
329*61046927SAndroid Build Coastguard Worker          case OPC_READ_COND_MACRO:
330*61046927SAndroid Build Coastguard Worker          case OPC_ELECT_MACRO:
331*61046927SAndroid Build Coastguard Worker          case OPC_READ_FIRST_MACRO:
332*61046927SAndroid Build Coastguard Worker             so->branchstack = MAX2(so->branchstack, rc_level + 1);
333*61046927SAndroid Build Coastguard Worker             break;
334*61046927SAndroid Build Coastguard Worker          default:
335*61046927SAndroid Build Coastguard Worker             break;
336*61046927SAndroid Build Coastguard Worker          }
337*61046927SAndroid Build Coastguard Worker       }
338*61046927SAndroid Build Coastguard Worker 
339*61046927SAndroid Build Coastguard Worker       rc_level += blocks[block->index].divergence_count;
340*61046927SAndroid Build Coastguard Worker 
341*61046927SAndroid Build Coastguard Worker       so->branchstack = MAX2(so->branchstack, rc_level);
342*61046927SAndroid Build Coastguard Worker    }
343*61046927SAndroid Build Coastguard Worker    assert(rc_level == 0);
344*61046927SAndroid Build Coastguard Worker 
345*61046927SAndroid Build Coastguard Worker    ralloc_free(mem_ctx);
346*61046927SAndroid Build Coastguard Worker }
347*61046927SAndroid Build Coastguard Worker 
348