1*9880d681SAndroid Build Coastguard Worker; RUN: opt %s -analyze -divergence | FileCheck %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Workertarget datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" 4*9880d681SAndroid Build Coastguard Workertarget triple = "nvptx64-nvidia-cuda" 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Worker; return (n < 0 ? a + threadIdx.x : b + threadIdx.x) 7*9880d681SAndroid Build Coastguard Workerdefine i32 @no_diverge(i32 %n, i32 %a, i32 %b) { 8*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'no_diverge' 9*9880d681SAndroid Build Coastguard Workerentry: 10*9880d681SAndroid Build Coastguard Worker %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 11*9880d681SAndroid Build Coastguard Worker %cond = icmp slt i32 %n, 0 12*9880d681SAndroid Build Coastguard Worker br i1 %cond, label %then, label %else ; uniform 13*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: DIVERGENT: br i1 %cond, 14*9880d681SAndroid Build Coastguard Workerthen: 15*9880d681SAndroid Build Coastguard Worker %a1 = add i32 %a, %tid 16*9880d681SAndroid Build Coastguard Worker br label %merge 17*9880d681SAndroid Build Coastguard Workerelse: 18*9880d681SAndroid Build Coastguard Worker %b2 = add i32 %b, %tid 19*9880d681SAndroid Build Coastguard Worker br label %merge 20*9880d681SAndroid Build Coastguard Workermerge: 21*9880d681SAndroid Build Coastguard Worker %c = phi i32 [ %a1, %then ], [ %b2, %else ] 22*9880d681SAndroid Build Coastguard Worker ret i32 %c 23*9880d681SAndroid Build Coastguard Worker} 24*9880d681SAndroid Build Coastguard Worker 25*9880d681SAndroid Build Coastguard Worker; c = a; 26*9880d681SAndroid Build Coastguard Worker; if (threadIdx.x < 5) // divergent: data dependent 27*9880d681SAndroid Build Coastguard Worker; c = b; 28*9880d681SAndroid Build Coastguard Worker; return c; // c is divergent: sync dependent 29*9880d681SAndroid Build Coastguard Workerdefine i32 @sync(i32 %a, i32 %b) { 30*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'sync' 31*9880d681SAndroid Build Coastguard Workerbb1: 32*9880d681SAndroid Build Coastguard Worker %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() 33*9880d681SAndroid Build Coastguard Worker %cond = icmp slt i32 %tid, 5 34*9880d681SAndroid Build Coastguard Worker br i1 %cond, label %bb2, label %bb3 35*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond, 36*9880d681SAndroid Build Coastguard Workerbb2: 37*9880d681SAndroid Build Coastguard Worker br label %bb3 38*9880d681SAndroid Build Coastguard Workerbb3: 39*9880d681SAndroid Build Coastguard Worker %c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid 40*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: %c = 41*9880d681SAndroid Build Coastguard Worker ret i32 %c 42*9880d681SAndroid Build Coastguard Worker} 43*9880d681SAndroid Build Coastguard Worker 44*9880d681SAndroid Build Coastguard Worker; c = 0; 45*9880d681SAndroid Build Coastguard Worker; if (threadIdx.x >= 5) { // divergent 46*9880d681SAndroid Build Coastguard Worker; c = (n < 0 ? a : b); // c here is uniform because n is uniform 47*9880d681SAndroid Build Coastguard Worker; } 48*9880d681SAndroid Build Coastguard Worker; // c here is divergent because it is sync dependent on threadIdx.x >= 5 49*9880d681SAndroid Build Coastguard Worker; return c; 50*9880d681SAndroid Build Coastguard Workerdefine i32 @mixed(i32 %n, i32 %a, i32 %b) { 51*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'mixed' 52*9880d681SAndroid Build Coastguard Workerbb1: 53*9880d681SAndroid Build Coastguard Worker %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() 54*9880d681SAndroid Build Coastguard Worker %cond = icmp slt i32 %tid, 5 55*9880d681SAndroid Build Coastguard Worker br i1 %cond, label %bb6, label %bb2 56*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond, 57*9880d681SAndroid Build Coastguard Workerbb2: 58*9880d681SAndroid Build Coastguard Worker %cond2 = icmp slt i32 %n, 0 59*9880d681SAndroid Build Coastguard Worker br i1 %cond2, label %bb4, label %bb3 60*9880d681SAndroid Build Coastguard Workerbb3: 61*9880d681SAndroid Build Coastguard Worker br label %bb5 62*9880d681SAndroid Build Coastguard Workerbb4: 63*9880d681SAndroid Build Coastguard Worker br label %bb5 64*9880d681SAndroid Build Coastguard Workerbb5: 65*9880d681SAndroid Build Coastguard Worker %c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ] 66*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: DIVERGENT: %c = 67*9880d681SAndroid Build Coastguard Worker br label %bb6 68*9880d681SAndroid Build Coastguard Workerbb6: 69*9880d681SAndroid Build Coastguard Worker %c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ] 70*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: %c2 = 71*9880d681SAndroid Build Coastguard Worker ret i32 %c2 72*9880d681SAndroid Build Coastguard Worker} 73*9880d681SAndroid Build Coastguard Worker 74*9880d681SAndroid Build Coastguard Worker; We conservatively treats all parameters of a __device__ function as divergent. 75*9880d681SAndroid Build Coastguard Workerdefine i32 @device(i32 %n, i32 %a, i32 %b) { 76*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'device' 77*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: i32 %n 78*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: i32 %a 79*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: i32 %b 80*9880d681SAndroid Build Coastguard Workerentry: 81*9880d681SAndroid Build Coastguard Worker %cond = icmp slt i32 %n, 0 82*9880d681SAndroid Build Coastguard Worker br i1 %cond, label %then, label %else 83*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond, 84*9880d681SAndroid Build Coastguard Workerthen: 85*9880d681SAndroid Build Coastguard Worker br label %merge 86*9880d681SAndroid Build Coastguard Workerelse: 87*9880d681SAndroid Build Coastguard Worker br label %merge 88*9880d681SAndroid Build Coastguard Workermerge: 89*9880d681SAndroid Build Coastguard Worker %c = phi i32 [ %a, %then ], [ %b, %else ] 90*9880d681SAndroid Build Coastguard Worker ret i32 %c 91*9880d681SAndroid Build Coastguard Worker} 92*9880d681SAndroid Build Coastguard Worker 93*9880d681SAndroid Build Coastguard Worker; int i = 0; 94*9880d681SAndroid Build Coastguard Worker; do { 95*9880d681SAndroid Build Coastguard Worker; i++; // i here is uniform 96*9880d681SAndroid Build Coastguard Worker; } while (i < laneid); 97*9880d681SAndroid Build Coastguard Worker; return i == 10 ? 0 : 1; // i here is divergent 98*9880d681SAndroid Build Coastguard Worker; 99*9880d681SAndroid Build Coastguard Worker; The i defined in the loop is used outside. 100*9880d681SAndroid Build Coastguard Workerdefine i32 @loop() { 101*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop' 102*9880d681SAndroid Build Coastguard Workerentry: 103*9880d681SAndroid Build Coastguard Worker %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid() 104*9880d681SAndroid Build Coastguard Worker br label %loop 105*9880d681SAndroid Build Coastguard Workerloop: 106*9880d681SAndroid Build Coastguard Worker %i = phi i32 [ 0, %entry ], [ %i1, %loop ] 107*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: DIVERGENT: %i = 108*9880d681SAndroid Build Coastguard Worker %i1 = add i32 %i, 1 109*9880d681SAndroid Build Coastguard Worker %exit_cond = icmp sge i32 %i1, %laneid 110*9880d681SAndroid Build Coastguard Worker br i1 %exit_cond, label %loop_exit, label %loop 111*9880d681SAndroid Build Coastguard Workerloop_exit: 112*9880d681SAndroid Build Coastguard Worker %cond = icmp eq i32 %i, 10 113*9880d681SAndroid Build Coastguard Worker br i1 %cond, label %then, label %else 114*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond, 115*9880d681SAndroid Build Coastguard Workerthen: 116*9880d681SAndroid Build Coastguard Worker ret i32 0 117*9880d681SAndroid Build Coastguard Workerelse: 118*9880d681SAndroid Build Coastguard Worker ret i32 1 119*9880d681SAndroid Build Coastguard Worker} 120*9880d681SAndroid Build Coastguard Worker 121*9880d681SAndroid Build Coastguard Worker; Same as @loop, but the loop is in the LCSSA form. 122*9880d681SAndroid Build Coastguard Workerdefine i32 @lcssa() { 123*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'lcssa' 124*9880d681SAndroid Build Coastguard Workerentry: 125*9880d681SAndroid Build Coastguard Worker %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 126*9880d681SAndroid Build Coastguard Worker br label %loop 127*9880d681SAndroid Build Coastguard Workerloop: 128*9880d681SAndroid Build Coastguard Worker %i = phi i32 [ 0, %entry ], [ %i1, %loop ] 129*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: DIVERGENT: %i = 130*9880d681SAndroid Build Coastguard Worker %i1 = add i32 %i, 1 131*9880d681SAndroid Build Coastguard Worker %exit_cond = icmp sge i32 %i1, %tid 132*9880d681SAndroid Build Coastguard Worker br i1 %exit_cond, label %loop_exit, label %loop 133*9880d681SAndroid Build Coastguard Workerloop_exit: 134*9880d681SAndroid Build Coastguard Worker %i.lcssa = phi i32 [ %i, %loop ] 135*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: %i.lcssa = 136*9880d681SAndroid Build Coastguard Worker %cond = icmp eq i32 %i.lcssa, 10 137*9880d681SAndroid Build Coastguard Worker br i1 %cond, label %then, label %else 138*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cond, 139*9880d681SAndroid Build Coastguard Workerthen: 140*9880d681SAndroid Build Coastguard Worker ret i32 0 141*9880d681SAndroid Build Coastguard Workerelse: 142*9880d681SAndroid Build Coastguard Worker ret i32 1 143*9880d681SAndroid Build Coastguard Worker} 144*9880d681SAndroid Build Coastguard Worker 145*9880d681SAndroid Build Coastguard Worker; This test contains an unstructured loop. 146*9880d681SAndroid Build Coastguard Worker; +-------------- entry ----------------+ 147*9880d681SAndroid Build Coastguard Worker; | | 148*9880d681SAndroid Build Coastguard Worker; V V 149*9880d681SAndroid Build Coastguard Worker; i1 = phi(0, i3) i2 = phi(0, i3) 150*9880d681SAndroid Build Coastguard Worker; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2 151*9880d681SAndroid Build Coastguard Worker; ^ | ^ 152*9880d681SAndroid Build Coastguard Worker; | V | 153*9880d681SAndroid Build Coastguard Worker; +-------- switch (tid / i3) ----------+ 154*9880d681SAndroid Build Coastguard Worker; | 155*9880d681SAndroid Build Coastguard Worker; V 156*9880d681SAndroid Build Coastguard Worker; if (i3 == 5) // divergent 157*9880d681SAndroid Build Coastguard Worker; because sync dependent on (tid / i3). 158*9880d681SAndroid Build Coastguard Workerdefine i32 @unstructured_loop(i1 %entry_cond) { 159*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'unstructured_loop' 160*9880d681SAndroid Build Coastguard Workerentry: 161*9880d681SAndroid Build Coastguard Worker %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 162*9880d681SAndroid Build Coastguard Worker br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2 163*9880d681SAndroid Build Coastguard Workerloop_entry_1: 164*9880d681SAndroid Build Coastguard Worker %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ] 165*9880d681SAndroid Build Coastguard Worker %j1 = add i32 %i1, 1 166*9880d681SAndroid Build Coastguard Worker br label %loop_body 167*9880d681SAndroid Build Coastguard Workerloop_entry_2: 168*9880d681SAndroid Build Coastguard Worker %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ] 169*9880d681SAndroid Build Coastguard Worker %j2 = add i32 %i2, 2 170*9880d681SAndroid Build Coastguard Worker br label %loop_body 171*9880d681SAndroid Build Coastguard Workerloop_body: 172*9880d681SAndroid Build Coastguard Worker %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ] 173*9880d681SAndroid Build Coastguard Worker br label %loop_latch 174*9880d681SAndroid Build Coastguard Workerloop_latch: 175*9880d681SAndroid Build Coastguard Worker %div = sdiv i32 %tid, %i3 176*9880d681SAndroid Build Coastguard Worker switch i32 %div, label %branch [ i32 1, label %loop_entry_1 177*9880d681SAndroid Build Coastguard Worker i32 2, label %loop_entry_2 ] 178*9880d681SAndroid Build Coastguard Workerbranch: 179*9880d681SAndroid Build Coastguard Worker %cmp = icmp eq i32 %i3, 5 180*9880d681SAndroid Build Coastguard Worker br i1 %cmp, label %then, label %else 181*9880d681SAndroid Build Coastguard Worker; CHECK: DIVERGENT: br i1 %cmp, 182*9880d681SAndroid Build Coastguard Workerthen: 183*9880d681SAndroid Build Coastguard Worker ret i32 0 184*9880d681SAndroid Build Coastguard Workerelse: 185*9880d681SAndroid Build Coastguard Worker ret i32 1 186*9880d681SAndroid Build Coastguard Worker} 187*9880d681SAndroid Build Coastguard Worker 188*9880d681SAndroid Build Coastguard Worker; Verifies sync-dependence is computed correctly in the absense of loops. 189*9880d681SAndroid Build Coastguard Workerdefine i32 @sync_no_loop(i32 %arg) { 190*9880d681SAndroid Build Coastguard Workerentry: 191*9880d681SAndroid Build Coastguard Worker %0 = add i32 %arg, 1 192*9880d681SAndroid Build Coastguard Worker %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 193*9880d681SAndroid Build Coastguard Worker %1 = icmp sge i32 %tid, 10 194*9880d681SAndroid Build Coastguard Worker br i1 %1, label %bb1, label %bb2 195*9880d681SAndroid Build Coastguard Worker 196*9880d681SAndroid Build Coastguard Workerbb1: 197*9880d681SAndroid Build Coastguard Worker br label %bb3 198*9880d681SAndroid Build Coastguard Worker 199*9880d681SAndroid Build Coastguard Workerbb2: 200*9880d681SAndroid Build Coastguard Worker br label %bb3 201*9880d681SAndroid Build Coastguard Worker 202*9880d681SAndroid Build Coastguard Workerbb3: 203*9880d681SAndroid Build Coastguard Worker %2 = add i32 %0, 2 204*9880d681SAndroid Build Coastguard Worker ; CHECK-NOT: DIVERGENT: %2 205*9880d681SAndroid Build Coastguard Worker ret i32 %2 206*9880d681SAndroid Build Coastguard Worker} 207*9880d681SAndroid Build Coastguard Worker 208*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.nvvm.read.ptx.sreg.tid.x() 209*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.nvvm.read.ptx.sreg.tid.y() 210*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.nvvm.read.ptx.sreg.tid.z() 211*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.nvvm.read.ptx.sreg.laneid() 212*9880d681SAndroid Build Coastguard Worker 213*9880d681SAndroid Build Coastguard Worker!nvvm.annotations = !{!0, !1, !2, !3, !4, !5} 214*9880d681SAndroid Build Coastguard Worker!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1} 215*9880d681SAndroid Build Coastguard Worker!1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1} 216*9880d681SAndroid Build Coastguard Worker!2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1} 217*9880d681SAndroid Build Coastguard Worker!3 = !{i32 ()* @loop, !"kernel", i32 1} 218*9880d681SAndroid Build Coastguard Worker!4 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1} 219*9880d681SAndroid Build Coastguard Worker!5 = !{i32 (i32)* @sync_no_loop, !"kernel", i32 1} 220