1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Worker@lds = addrspace(3) global [512 x float] undef, align 4 4*9880d681SAndroid Build Coastguard Worker@lds.f64 = addrspace(3) global [512 x double] undef, align 8 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Worker 7*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f32_0_1 8*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1 9*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0) 10*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] 11*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]] 12*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 13*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 { 14*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 15*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 16*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 17*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 64 18*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 19*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 20*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 21*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 22*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 23*9880d681SAndroid Build Coastguard Worker ret void 24*9880d681SAndroid Build Coastguard Worker} 25*9880d681SAndroid Build Coastguard Worker 26*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f32_1_2 27*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2 28*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0) 29*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] 30*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]] 31*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 32*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { 33*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 34*9880d681SAndroid Build Coastguard Worker %add.x.0 = add nsw i32 %x.i, 64 35*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0 36*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 37*9880d681SAndroid Build Coastguard Worker %add.x.1 = add nsw i32 %x.i, 128 38*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1 39*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 40*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 41*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 42*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 43*9880d681SAndroid Build Coastguard Worker ret void 44*9880d681SAndroid Build Coastguard Worker} 45*9880d681SAndroid Build Coastguard Worker 46*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f32_max_offset 47*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:255 offset1:1 48*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0) 49*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] 50*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]] 51*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 52*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { 53*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 54*9880d681SAndroid Build Coastguard Worker %add.x.0 = add nsw i32 %x.i, 64 55*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0 56*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 57*9880d681SAndroid Build Coastguard Worker %add.x.1 = add nsw i32 %x.i, 16320 58*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1 59*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 60*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 61*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 62*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 63*9880d681SAndroid Build Coastguard Worker ret void 64*9880d681SAndroid Build Coastguard Worker} 65*9880d681SAndroid Build Coastguard Worker 66*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f32_over_max_offset 67*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b32 68*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} 69*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 70*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]{{$}} 71*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 72*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { 73*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 74*9880d681SAndroid Build Coastguard Worker %add.x.0 = add nsw i32 %x.i, 64 75*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0 76*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 77*9880d681SAndroid Build Coastguard Worker %add.x.1 = add nsw i32 %x.i, 16384 78*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1 79*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 80*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 81*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 82*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 83*9880d681SAndroid Build Coastguard Worker ret void 84*9880d681SAndroid Build Coastguard Worker} 85*9880d681SAndroid Build Coastguard Worker 86*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @odd_invalid_read2st64_f32_0 87*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b32 88*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 89*9880d681SAndroid Build Coastguard Workerdefine void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 { 90*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 91*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 92*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 93*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 63 94*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 95*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 96*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 97*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 98*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 99*9880d681SAndroid Build Coastguard Worker ret void 100*9880d681SAndroid Build Coastguard Worker} 101*9880d681SAndroid Build Coastguard Worker 102*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @odd_invalid_read2st64_f32_1 103*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b32 104*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 105*9880d681SAndroid Build Coastguard Workerdefine void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 { 106*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 107*9880d681SAndroid Build Coastguard Worker %add.x.0 = add nsw i32 %x.i, 64 108*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0 109*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 110*9880d681SAndroid Build Coastguard Worker %add.x.1 = add nsw i32 %x.i, 127 111*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1 112*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 113*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 114*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 115*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 116*9880d681SAndroid Build Coastguard Worker ret void 117*9880d681SAndroid Build Coastguard Worker} 118*9880d681SAndroid Build Coastguard Worker 119*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f64_0_1 120*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1 121*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0) 122*9880d681SAndroid Build Coastguard Worker; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} 123*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dwordx2 [[RESULT]] 124*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 125*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 { 126*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 127*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i 128*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 8 129*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 64 130*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x 131*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 8 132*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 133*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 134*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 8 135*9880d681SAndroid Build Coastguard Worker ret void 136*9880d681SAndroid Build Coastguard Worker} 137*9880d681SAndroid Build Coastguard Worker 138*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f64_1_2 139*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2 140*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0) 141*9880d681SAndroid Build Coastguard Worker; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} 142*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dwordx2 [[RESULT]] 143*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 144*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { 145*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 146*9880d681SAndroid Build Coastguard Worker %add.x.0 = add nsw i32 %x.i, 64 147*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0 148*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 8 149*9880d681SAndroid Build Coastguard Worker %add.x.1 = add nsw i32 %x.i, 128 150*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1 151*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 8 152*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 153*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 154*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 8 155*9880d681SAndroid Build Coastguard Worker ret void 156*9880d681SAndroid Build Coastguard Worker} 157*9880d681SAndroid Build Coastguard Worker 158*9880d681SAndroid Build Coastguard Worker; Alignment only 159*9880d681SAndroid Build Coastguard Worker 160*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @misaligned_read2st64_f64 161*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 162*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129 163*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 164*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { 165*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 166*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i 167*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 4 168*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 64 169*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x 170*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 4 171*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 172*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 173*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 4 174*9880d681SAndroid Build Coastguard Worker ret void 175*9880d681SAndroid Build Coastguard Worker} 176*9880d681SAndroid Build Coastguard Worker 177*9880d681SAndroid Build Coastguard Worker; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff 178*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f64_max_offset 179*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:127 offset1:4 180*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0) 181*9880d681SAndroid Build Coastguard Worker; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}, v{{\[}}[[LO_VREG]]:{{[0-9]+\]}} 182*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dwordx2 [[RESULT]] 183*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 184*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { 185*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 186*9880d681SAndroid Build Coastguard Worker %add.x.0 = add nsw i32 %x.i, 256 187*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0 188*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 8 189*9880d681SAndroid Build Coastguard Worker %add.x.1 = add nsw i32 %x.i, 8128 190*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1 191*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 8 192*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 193*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 194*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 8 195*9880d681SAndroid Build Coastguard Worker ret void 196*9880d681SAndroid Build Coastguard Worker} 197*9880d681SAndroid Build Coastguard Worker 198*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f64_over_max_offset 199*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b64 200*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 201*9880d681SAndroid Build Coastguard Worker; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} 202*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] 203*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 204*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { 205*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 206*9880d681SAndroid Build Coastguard Worker %add.x.0 = add nsw i32 %x.i, 64 207*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0 208*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 8 209*9880d681SAndroid Build Coastguard Worker %add.x.1 = add nsw i32 %x.i, 8192 210*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1 211*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 8 212*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 213*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 214*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 8 215*9880d681SAndroid Build Coastguard Worker ret void 216*9880d681SAndroid Build Coastguard Worker} 217*9880d681SAndroid Build Coastguard Worker 218*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @invalid_read2st64_f64_odd_offset 219*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b64 220*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 221*9880d681SAndroid Build Coastguard Workerdefine void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { 222*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 223*9880d681SAndroid Build Coastguard Worker %add.x.0 = add nsw i32 %x.i, 64 224*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0 225*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 8 226*9880d681SAndroid Build Coastguard Worker %add.x.1 = add nsw i32 %x.i, 8129 227*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1 228*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 8 229*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 230*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 231*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 8 232*9880d681SAndroid Build Coastguard Worker ret void 233*9880d681SAndroid Build Coastguard Worker} 234*9880d681SAndroid Build Coastguard Worker 235*9880d681SAndroid Build Coastguard Worker; The stride of 8 elements is 8 * 8 bytes. We need to make sure the 236*9880d681SAndroid Build Coastguard Worker; stride in elements, not bytes, is a multiple of 64. 237*9880d681SAndroid Build Coastguard Worker 238*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @byte_size_only_divisible_64_read2_f64 239*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st_b64 240*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8 241*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 242*9880d681SAndroid Build Coastguard Workerdefine void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { 243*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 244*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i 245*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 8 246*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 247*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x 248*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 8 249*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 250*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 251*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 4 252*9880d681SAndroid Build Coastguard Worker ret void 253*9880d681SAndroid Build Coastguard Worker} 254*9880d681SAndroid Build Coastguard Worker 255*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 256*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.x() #1 257*9880d681SAndroid Build Coastguard Worker 258*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 259*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.y() #1 260*9880d681SAndroid Build Coastguard Worker 261*9880d681SAndroid Build Coastguard Workerattributes #0 = { nounwind } 262*9880d681SAndroid Build Coastguard Workerattributes #1 = { nounwind readnone } 263