xref: /aosp_15_r20/external/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: opt %s -analyze -divergence | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workertarget datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
4*9880d681SAndroid Build Coastguard Workertarget triple = "nvptx64-nvidia-cuda"
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Worker; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
7*9880d681SAndroid Build Coastguard Workerdefine i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
8*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'no_diverge'
9*9880d681SAndroid Build Coastguard Workerentry:
10*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
11*9880d681SAndroid Build Coastguard Worker  %cond = icmp slt i32 %n, 0
12*9880d681SAndroid Build Coastguard Worker  br i1 %cond, label %then, label %else ; uniform
13*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: DIVERGENT: br i1 %cond,
14*9880d681SAndroid Build Coastguard Workerthen:
15*9880d681SAndroid Build Coastguard Worker  %a1 = add i32 %a, %tid
16*9880d681SAndroid Build Coastguard Worker  br label %merge
17*9880d681SAndroid Build Coastguard Workerelse:
18*9880d681SAndroid Build Coastguard Worker  %b2 = add i32 %b, %tid
19*9880d681SAndroid Build Coastguard Worker  br label %merge
20*9880d681SAndroid Build Coastguard Workermerge:
21*9880d681SAndroid Build Coastguard Worker  %c = phi i32 [ %a1, %then ], [ %b2, %else ]
22*9880d681SAndroid Build Coastguard Worker  ret i32 %c
23*9880d681SAndroid Build Coastguard Worker}
24*9880d681SAndroid Build Coastguard Worker
25*9880d681SAndroid Build Coastguard Worker; c = a;
26*9880d681SAndroid Build Coastguard Worker; if (threadIdx.x < 5)    // divergent: data dependent
27*9880d681SAndroid Build Coastguard Worker;   c = b;
28*9880d681SAndroid Build Coastguard Worker; return c;               // c is divergent: sync dependent
29*9880d681SAndroid Build Coastguard Workerdefine i32 @sync(i32 %a, i32 %b) {
30*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'sync'
31*9880d681SAndroid Build Coastguard Workerbb1:
32*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
33*9880d681SAndroid Build Coastguard Worker  %cond = icmp slt i32 %tid, 5
34*9880d681SAndroid Build Coastguard Worker  br i1 %cond, label %bb2, label %bb3
35*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond,
36*9880d681SAndroid Build Coastguard Workerbb2:
37*9880d681SAndroid Build Coastguard Worker  br label %bb3
38*9880d681SAndroid Build Coastguard Workerbb3:
39*9880d681SAndroid Build Coastguard Worker  %c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid
40*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: %c =
41*9880d681SAndroid Build Coastguard Worker  ret i32 %c
42*9880d681SAndroid Build Coastguard Worker}
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Worker; c = 0;
45*9880d681SAndroid Build Coastguard Worker; if (threadIdx.x >= 5) {  // divergent
46*9880d681SAndroid Build Coastguard Worker;   c = (n < 0 ? a : b);  // c here is uniform because n is uniform
47*9880d681SAndroid Build Coastguard Worker; }
48*9880d681SAndroid Build Coastguard Worker; // c here is divergent because it is sync dependent on threadIdx.x >= 5
49*9880d681SAndroid Build Coastguard Worker; return c;
50*9880d681SAndroid Build Coastguard Workerdefine i32 @mixed(i32 %n, i32 %a, i32 %b) {
51*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'mixed'
52*9880d681SAndroid Build Coastguard Workerbb1:
53*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
54*9880d681SAndroid Build Coastguard Worker  %cond = icmp slt i32 %tid, 5
55*9880d681SAndroid Build Coastguard Worker  br i1 %cond, label %bb6, label %bb2
56*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond,
57*9880d681SAndroid Build Coastguard Workerbb2:
58*9880d681SAndroid Build Coastguard Worker  %cond2 = icmp slt i32 %n, 0
59*9880d681SAndroid Build Coastguard Worker  br i1 %cond2, label %bb4, label %bb3
60*9880d681SAndroid Build Coastguard Workerbb3:
61*9880d681SAndroid Build Coastguard Worker  br label %bb5
62*9880d681SAndroid Build Coastguard Workerbb4:
63*9880d681SAndroid Build Coastguard Worker  br label %bb5
64*9880d681SAndroid Build Coastguard Workerbb5:
65*9880d681SAndroid Build Coastguard Worker  %c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ]
66*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: DIVERGENT: %c =
67*9880d681SAndroid Build Coastguard Worker  br label %bb6
68*9880d681SAndroid Build Coastguard Workerbb6:
69*9880d681SAndroid Build Coastguard Worker  %c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ]
70*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: %c2 =
71*9880d681SAndroid Build Coastguard Worker  ret i32 %c2
72*9880d681SAndroid Build Coastguard Worker}
73*9880d681SAndroid Build Coastguard Worker
74*9880d681SAndroid Build Coastguard Worker; We conservatively treats all parameters of a __device__ function as divergent.
75*9880d681SAndroid Build Coastguard Workerdefine i32 @device(i32 %n, i32 %a, i32 %b) {
76*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'device'
77*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: i32 %n
78*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: i32 %a
79*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: i32 %b
80*9880d681SAndroid Build Coastguard Workerentry:
81*9880d681SAndroid Build Coastguard Worker  %cond = icmp slt i32 %n, 0
82*9880d681SAndroid Build Coastguard Worker  br i1 %cond, label %then, label %else
83*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond,
84*9880d681SAndroid Build Coastguard Workerthen:
85*9880d681SAndroid Build Coastguard Worker  br label %merge
86*9880d681SAndroid Build Coastguard Workerelse:
87*9880d681SAndroid Build Coastguard Worker  br label %merge
88*9880d681SAndroid Build Coastguard Workermerge:
89*9880d681SAndroid Build Coastguard Worker  %c = phi i32 [ %a, %then ], [ %b, %else ]
90*9880d681SAndroid Build Coastguard Worker  ret i32 %c
91*9880d681SAndroid Build Coastguard Worker}
92*9880d681SAndroid Build Coastguard Worker
93*9880d681SAndroid Build Coastguard Worker; int i = 0;
94*9880d681SAndroid Build Coastguard Worker; do {
95*9880d681SAndroid Build Coastguard Worker;   i++;                  // i here is uniform
96*9880d681SAndroid Build Coastguard Worker; } while (i < laneid);
97*9880d681SAndroid Build Coastguard Worker; return i == 10 ? 0 : 1; // i here is divergent
98*9880d681SAndroid Build Coastguard Worker;
99*9880d681SAndroid Build Coastguard Worker; The i defined in the loop is used outside.
100*9880d681SAndroid Build Coastguard Workerdefine i32 @loop() {
101*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop'
102*9880d681SAndroid Build Coastguard Workerentry:
103*9880d681SAndroid Build Coastguard Worker  %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
104*9880d681SAndroid Build Coastguard Worker  br label %loop
105*9880d681SAndroid Build Coastguard Workerloop:
106*9880d681SAndroid Build Coastguard Worker  %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
107*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: DIVERGENT: %i =
108*9880d681SAndroid Build Coastguard Worker  %i1 = add i32 %i, 1
109*9880d681SAndroid Build Coastguard Worker  %exit_cond = icmp sge i32 %i1, %laneid
110*9880d681SAndroid Build Coastguard Worker  br i1 %exit_cond, label %loop_exit, label %loop
111*9880d681SAndroid Build Coastguard Workerloop_exit:
112*9880d681SAndroid Build Coastguard Worker  %cond = icmp eq i32 %i, 10
113*9880d681SAndroid Build Coastguard Worker  br i1 %cond, label %then, label %else
114*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond,
115*9880d681SAndroid Build Coastguard Workerthen:
116*9880d681SAndroid Build Coastguard Worker  ret i32 0
117*9880d681SAndroid Build Coastguard Workerelse:
118*9880d681SAndroid Build Coastguard Worker  ret i32 1
119*9880d681SAndroid Build Coastguard Worker}
120*9880d681SAndroid Build Coastguard Worker
121*9880d681SAndroid Build Coastguard Worker; Same as @loop, but the loop is in the LCSSA form.
122*9880d681SAndroid Build Coastguard Workerdefine i32 @lcssa() {
123*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'lcssa'
124*9880d681SAndroid Build Coastguard Workerentry:
125*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
126*9880d681SAndroid Build Coastguard Worker  br label %loop
127*9880d681SAndroid Build Coastguard Workerloop:
128*9880d681SAndroid Build Coastguard Worker  %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
129*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: DIVERGENT: %i =
130*9880d681SAndroid Build Coastguard Worker  %i1 = add i32 %i, 1
131*9880d681SAndroid Build Coastguard Worker  %exit_cond = icmp sge i32 %i1, %tid
132*9880d681SAndroid Build Coastguard Worker  br i1 %exit_cond, label %loop_exit, label %loop
133*9880d681SAndroid Build Coastguard Workerloop_exit:
134*9880d681SAndroid Build Coastguard Worker  %i.lcssa = phi i32 [ %i, %loop ]
135*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: %i.lcssa =
136*9880d681SAndroid Build Coastguard Worker  %cond = icmp eq i32 %i.lcssa, 10
137*9880d681SAndroid Build Coastguard Worker  br i1 %cond, label %then, label %else
138*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond,
139*9880d681SAndroid Build Coastguard Workerthen:
140*9880d681SAndroid Build Coastguard Worker  ret i32 0
141*9880d681SAndroid Build Coastguard Workerelse:
142*9880d681SAndroid Build Coastguard Worker  ret i32 1
143*9880d681SAndroid Build Coastguard Worker}
144*9880d681SAndroid Build Coastguard Worker
145*9880d681SAndroid Build Coastguard Worker; This test contains an unstructured loop.
146*9880d681SAndroid Build Coastguard Worker;           +-------------- entry ----------------+
147*9880d681SAndroid Build Coastguard Worker;           |                                     |
148*9880d681SAndroid Build Coastguard Worker;           V                                     V
149*9880d681SAndroid Build Coastguard Worker; i1 = phi(0, i3)                            i2 = phi(0, i3)
150*9880d681SAndroid Build Coastguard Worker;     j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
151*9880d681SAndroid Build Coastguard Worker;           ^                 |                   ^
152*9880d681SAndroid Build Coastguard Worker;           |                 V                   |
153*9880d681SAndroid Build Coastguard Worker;           +-------- switch (tid / i3) ----------+
154*9880d681SAndroid Build Coastguard Worker;                             |
155*9880d681SAndroid Build Coastguard Worker;                             V
156*9880d681SAndroid Build Coastguard Worker;                        if (i3 == 5) // divergent
157*9880d681SAndroid Build Coastguard Worker; because sync dependent on (tid / i3).
158*9880d681SAndroid Build Coastguard Workerdefine i32 @unstructured_loop(i1 %entry_cond) {
159*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'unstructured_loop'
160*9880d681SAndroid Build Coastguard Workerentry:
161*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
162*9880d681SAndroid Build Coastguard Worker  br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
163*9880d681SAndroid Build Coastguard Workerloop_entry_1:
164*9880d681SAndroid Build Coastguard Worker  %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
165*9880d681SAndroid Build Coastguard Worker  %j1 = add i32 %i1, 1
166*9880d681SAndroid Build Coastguard Worker  br label %loop_body
167*9880d681SAndroid Build Coastguard Workerloop_entry_2:
168*9880d681SAndroid Build Coastguard Worker  %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
169*9880d681SAndroid Build Coastguard Worker  %j2 = add i32 %i2, 2
170*9880d681SAndroid Build Coastguard Worker  br label %loop_body
171*9880d681SAndroid Build Coastguard Workerloop_body:
172*9880d681SAndroid Build Coastguard Worker  %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
173*9880d681SAndroid Build Coastguard Worker  br label %loop_latch
174*9880d681SAndroid Build Coastguard Workerloop_latch:
175*9880d681SAndroid Build Coastguard Worker  %div = sdiv i32 %tid, %i3
176*9880d681SAndroid Build Coastguard Worker  switch i32 %div, label %branch [ i32 1, label %loop_entry_1
177*9880d681SAndroid Build Coastguard Worker                                   i32 2, label %loop_entry_2 ]
178*9880d681SAndroid Build Coastguard Workerbranch:
179*9880d681SAndroid Build Coastguard Worker  %cmp = icmp eq i32 %i3, 5
180*9880d681SAndroid Build Coastguard Worker  br i1 %cmp, label %then, label %else
181*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cmp,
182*9880d681SAndroid Build Coastguard Workerthen:
183*9880d681SAndroid Build Coastguard Worker  ret i32 0
184*9880d681SAndroid Build Coastguard Workerelse:
185*9880d681SAndroid Build Coastguard Worker  ret i32 1
186*9880d681SAndroid Build Coastguard Worker}
187*9880d681SAndroid Build Coastguard Worker
188*9880d681SAndroid Build Coastguard Worker; Verifies sync-dependence is computed correctly in the absense of loops.
189*9880d681SAndroid Build Coastguard Workerdefine i32 @sync_no_loop(i32 %arg) {
190*9880d681SAndroid Build Coastguard Workerentry:
191*9880d681SAndroid Build Coastguard Worker  %0 = add i32 %arg, 1
192*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
193*9880d681SAndroid Build Coastguard Worker  %1 = icmp sge i32 %tid, 10
194*9880d681SAndroid Build Coastguard Worker  br i1 %1, label %bb1, label %bb2
195*9880d681SAndroid Build Coastguard Worker
196*9880d681SAndroid Build Coastguard Workerbb1:
197*9880d681SAndroid Build Coastguard Worker  br label %bb3
198*9880d681SAndroid Build Coastguard Worker
199*9880d681SAndroid Build Coastguard Workerbb2:
200*9880d681SAndroid Build Coastguard Worker  br label %bb3
201*9880d681SAndroid Build Coastguard Worker
202*9880d681SAndroid Build Coastguard Workerbb3:
203*9880d681SAndroid Build Coastguard Worker  %2 = add i32 %0, 2
204*9880d681SAndroid Build Coastguard Worker  ; CHECK-NOT: DIVERGENT: %2
205*9880d681SAndroid Build Coastguard Worker  ret i32 %2
206*9880d681SAndroid Build Coastguard Worker}
207*9880d681SAndroid Build Coastguard Worker
208*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
209*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
210*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
211*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.nvvm.read.ptx.sreg.laneid()
212*9880d681SAndroid Build Coastguard Worker
213*9880d681SAndroid Build Coastguard Worker!nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
214*9880d681SAndroid Build Coastguard Worker!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
215*9880d681SAndroid Build Coastguard Worker!1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1}
216*9880d681SAndroid Build Coastguard Worker!2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1}
217*9880d681SAndroid Build Coastguard Worker!3 = !{i32 ()* @loop, !"kernel", i32 1}
218*9880d681SAndroid Build Coastguard Worker!4 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}
219*9880d681SAndroid Build Coastguard Worker!5 = !{i32 (i32)* @sync_no_loop, !"kernel", i32 1}
220