1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Worker; FIXME: We don't get cases where the address was an SGPR because we 4*9880d681SAndroid Build Coastguard Worker; get a copy to the address register for each one. 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Worker@lds = addrspace(3) global [512 x float] undef, align 4 7*9880d681SAndroid Build Coastguard Worker@lds.f64 = addrspace(3) global [512 x double] undef, align 8 8*9880d681SAndroid Build Coastguard Worker 9*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32 10*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8 11*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0) 12*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] 13*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]] 14*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 15*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32(float addrspace(1)* %out) #0 { 16*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 17*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 18*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 19*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 20*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 21*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 22*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 23*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 24*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 25*9880d681SAndroid Build Coastguard Worker ret void 26*9880d681SAndroid Build Coastguard Worker} 27*9880d681SAndroid Build Coastguard Worker 28*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_max_offset 29*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255 30*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0) 31*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] 32*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]] 33*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 34*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 { 35*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 36*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 37*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 38*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 255 39*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 40*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 41*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 42*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 43*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 44*9880d681SAndroid Build Coastguard Worker ret void 45*9880d681SAndroid Build Coastguard Worker} 46*9880d681SAndroid Build Coastguard Worker 47*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_too_far 48*9880d681SAndroid Build Coastguard Worker; SI-NOT ds_read2_b32 49*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} 50*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 51*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 52*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 { 53*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 54*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 55*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 56*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 257 57*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 58*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 59*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 60*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 61*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 62*9880d681SAndroid Build Coastguard Worker ret void 63*9880d681SAndroid Build Coastguard Worker} 64*9880d681SAndroid Build Coastguard Worker 65*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_x2 66*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8 67*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 68*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 69*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { 70*9880d681SAndroid Build Coastguard Worker %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 71*9880d681SAndroid Build Coastguard Worker %idx.0 = add nsw i32 %tid.x, 0 72*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 73*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 74*9880d681SAndroid Build Coastguard Worker 75*9880d681SAndroid Build Coastguard Worker %idx.1 = add nsw i32 %tid.x, 8 76*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1 77*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 78*9880d681SAndroid Build Coastguard Worker %sum.0 = fadd float %val0, %val1 79*9880d681SAndroid Build Coastguard Worker 80*9880d681SAndroid Build Coastguard Worker %idx.2 = add nsw i32 %tid.x, 11 81*9880d681SAndroid Build Coastguard Worker %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2 82*9880d681SAndroid Build Coastguard Worker %val2 = load float, float addrspace(3)* %arrayidx2, align 4 83*9880d681SAndroid Build Coastguard Worker 84*9880d681SAndroid Build Coastguard Worker %idx.3 = add nsw i32 %tid.x, 27 85*9880d681SAndroid Build Coastguard Worker %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3 86*9880d681SAndroid Build Coastguard Worker %val3 = load float, float addrspace(3)* %arrayidx3, align 4 87*9880d681SAndroid Build Coastguard Worker %sum.1 = fadd float %val2, %val3 88*9880d681SAndroid Build Coastguard Worker 89*9880d681SAndroid Build Coastguard Worker %sum = fadd float %sum.0, %sum.1 90*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0 91*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 92*9880d681SAndroid Build Coastguard Worker ret void 93*9880d681SAndroid Build Coastguard Worker} 94*9880d681SAndroid Build Coastguard Worker 95*9880d681SAndroid Build Coastguard Worker; Make sure there is an instruction between the two sets of reads. 96*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_x2_barrier 97*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8 98*9880d681SAndroid Build Coastguard Worker; SI: s_barrier 99*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 100*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 101*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 { 102*9880d681SAndroid Build Coastguard Worker %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 103*9880d681SAndroid Build Coastguard Worker %idx.0 = add nsw i32 %tid.x, 0 104*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 105*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 106*9880d681SAndroid Build Coastguard Worker 107*9880d681SAndroid Build Coastguard Worker %idx.1 = add nsw i32 %tid.x, 8 108*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1 109*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 110*9880d681SAndroid Build Coastguard Worker %sum.0 = fadd float %val0, %val1 111*9880d681SAndroid Build Coastguard Worker 112*9880d681SAndroid Build Coastguard Worker call void @llvm.amdgcn.s.barrier() #2 113*9880d681SAndroid Build Coastguard Worker 114*9880d681SAndroid Build Coastguard Worker %idx.2 = add nsw i32 %tid.x, 11 115*9880d681SAndroid Build Coastguard Worker %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2 116*9880d681SAndroid Build Coastguard Worker %val2 = load float, float addrspace(3)* %arrayidx2, align 4 117*9880d681SAndroid Build Coastguard Worker 118*9880d681SAndroid Build Coastguard Worker %idx.3 = add nsw i32 %tid.x, 27 119*9880d681SAndroid Build Coastguard Worker %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3 120*9880d681SAndroid Build Coastguard Worker %val3 = load float, float addrspace(3)* %arrayidx3, align 4 121*9880d681SAndroid Build Coastguard Worker %sum.1 = fadd float %val2, %val3 122*9880d681SAndroid Build Coastguard Worker 123*9880d681SAndroid Build Coastguard Worker %sum = fadd float %sum.0, %sum.1 124*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0 125*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 126*9880d681SAndroid Build Coastguard Worker ret void 127*9880d681SAndroid Build Coastguard Worker} 128*9880d681SAndroid Build Coastguard Worker 129*9880d681SAndroid Build Coastguard Worker; For some reason adding something to the base address for the first 130*9880d681SAndroid Build Coastguard Worker; element results in only folding the inner pair. 131*9880d681SAndroid Build Coastguard Worker 132*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_x2_nonzero_base 133*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8 134*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 135*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 136*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 { 137*9880d681SAndroid Build Coastguard Worker %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 138*9880d681SAndroid Build Coastguard Worker %idx.0 = add nsw i32 %tid.x, 2 139*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 140*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 141*9880d681SAndroid Build Coastguard Worker 142*9880d681SAndroid Build Coastguard Worker %idx.1 = add nsw i32 %tid.x, 8 143*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1 144*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 145*9880d681SAndroid Build Coastguard Worker %sum.0 = fadd float %val0, %val1 146*9880d681SAndroid Build Coastguard Worker 147*9880d681SAndroid Build Coastguard Worker %idx.2 = add nsw i32 %tid.x, 11 148*9880d681SAndroid Build Coastguard Worker %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2 149*9880d681SAndroid Build Coastguard Worker %val2 = load float, float addrspace(3)* %arrayidx2, align 4 150*9880d681SAndroid Build Coastguard Worker 151*9880d681SAndroid Build Coastguard Worker %idx.3 = add nsw i32 %tid.x, 27 152*9880d681SAndroid Build Coastguard Worker %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3 153*9880d681SAndroid Build Coastguard Worker %val3 = load float, float addrspace(3)* %arrayidx3, align 4 154*9880d681SAndroid Build Coastguard Worker %sum.1 = fadd float %val2, %val3 155*9880d681SAndroid Build Coastguard Worker 156*9880d681SAndroid Build Coastguard Worker %sum = fadd float %sum.0, %sum.1 157*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0 158*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 159*9880d681SAndroid Build Coastguard Worker ret void 160*9880d681SAndroid Build Coastguard Worker} 161*9880d681SAndroid Build Coastguard Worker 162*9880d681SAndroid Build Coastguard Worker; Be careful of vectors of pointers. We don't know if the 2 pointers 163*9880d681SAndroid Build Coastguard Worker; in the vectors are really the same base, so this is not safe to 164*9880d681SAndroid Build Coastguard Worker; merge. 165*9880d681SAndroid Build Coastguard Worker; Base pointers come from different subregister of same super 166*9880d681SAndroid Build Coastguard Worker; register. We can't safely merge this. 167*9880d681SAndroid Build Coastguard Worker 168*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @read2_ptr_is_subreg_arg_f32 169*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2_b32 170*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 171*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 172*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 173*9880d681SAndroid Build Coastguard Workerdefine void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { 174*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 175*9880d681SAndroid Build Coastguard Worker %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 176*9880d681SAndroid Build Coastguard Worker %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0 177*9880d681SAndroid Build Coastguard Worker %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1 178*9880d681SAndroid Build Coastguard Worker %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0 179*9880d681SAndroid Build Coastguard Worker %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1 180*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %gep.0, align 4 181*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %gep.1, align 4 182*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 183*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 184*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 185*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 186*9880d681SAndroid Build Coastguard Worker ret void 187*9880d681SAndroid Build Coastguard Worker} 188*9880d681SAndroid Build Coastguard Worker 189*9880d681SAndroid Build Coastguard Worker; Apply a constant scalar offset after the pointer vector extract. We 190*9880d681SAndroid Build Coastguard Worker; are rejecting merges that have the same, constant 0 offset, so make 191*9880d681SAndroid Build Coastguard Worker; sure we are really rejecting it because of the different 192*9880d681SAndroid Build Coastguard Worker; subregisters. 193*9880d681SAndroid Build Coastguard Worker 194*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @read2_ptr_is_subreg_arg_offset_f32 195*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2_b32 196*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 197*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 198*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 199*9880d681SAndroid Build Coastguard Workerdefine void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { 200*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 201*9880d681SAndroid Build Coastguard Worker %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 202*9880d681SAndroid Build Coastguard Worker %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0 203*9880d681SAndroid Build Coastguard Worker %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1 204*9880d681SAndroid Build Coastguard Worker %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0 205*9880d681SAndroid Build Coastguard Worker %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1 206*9880d681SAndroid Build Coastguard Worker 207*9880d681SAndroid Build Coastguard Worker ; Apply an additional offset after the vector that will be more obviously folded. 208*9880d681SAndroid Build Coastguard Worker %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8 209*9880d681SAndroid Build Coastguard Worker 210*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %gep.0, align 4 211*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %gep.1.offset, align 4 212*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 213*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 214*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 215*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 216*9880d681SAndroid Build Coastguard Worker ret void 217*9880d681SAndroid Build Coastguard Worker} 218*9880d681SAndroid Build Coastguard Worker 219*9880d681SAndroid Build Coastguard Worker; SI-LABEL: {{^}}read2_ptr_is_subreg_f32: 220*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}} 221*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 222*9880d681SAndroid Build Coastguard Workerdefine void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 { 223*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 224*9880d681SAndroid Build Coastguard Worker %ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0 225*9880d681SAndroid Build Coastguard Worker %ptr.1 = insertelement <2 x [512 x float] addrspace(3)*> %ptr.0, [512 x float] addrspace(3)* @lds, i32 1 226*9880d681SAndroid Build Coastguard Worker %x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 227*9880d681SAndroid Build Coastguard Worker %x.i.v.1 = insertelement <2 x i32> %x.i.v.0, i32 %x.i, i32 1 228*9880d681SAndroid Build Coastguard Worker %idx = add <2 x i32> %x.i.v.1, <i32 0, i32 8> 229*9880d681SAndroid Build Coastguard Worker %gep = getelementptr inbounds [512 x float], <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx 230*9880d681SAndroid Build Coastguard Worker %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0 231*9880d681SAndroid Build Coastguard Worker %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1 232*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %gep.0, align 4 233*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %gep.1, align 4 234*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 235*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 236*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 237*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 238*9880d681SAndroid Build Coastguard Worker ret void 239*9880d681SAndroid Build Coastguard Worker} 240*9880d681SAndroid Build Coastguard Worker 241*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_volatile_0 242*9880d681SAndroid Build Coastguard Worker; SI-NOT ds_read2_b32 243*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} 244*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 245*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 246*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 { 247*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 248*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 249*9880d681SAndroid Build Coastguard Worker %val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4 250*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 251*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 252*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 4 253*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 254*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 255*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 256*9880d681SAndroid Build Coastguard Worker ret void 257*9880d681SAndroid Build Coastguard Worker} 258*9880d681SAndroid Build Coastguard Worker 259*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_volatile_1 260*9880d681SAndroid Build Coastguard Worker; SI-NOT ds_read2_b32 261*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} 262*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 263*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 264*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 { 265*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 266*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 267*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 4 268*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 269*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 270*9880d681SAndroid Build Coastguard Worker %val1 = load volatile float, float addrspace(3)* %arrayidx1, align 4 271*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 272*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 273*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 274*9880d681SAndroid Build Coastguard Worker ret void 275*9880d681SAndroid Build Coastguard Worker} 276*9880d681SAndroid Build Coastguard Worker 277*9880d681SAndroid Build Coastguard Worker; Can't fold since not correctly aligned. 278*9880d681SAndroid Build Coastguard Worker; XXX: This isn't really testing anything useful now. I think CI 279*9880d681SAndroid Build Coastguard Worker; allows unaligned LDS accesses, which would be a problem here. 280*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @unaligned_read2_f32 281*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2_b32 282*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 283*9880d681SAndroid Build Coastguard Workerdefine void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { 284*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 285*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i 286*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 1 287*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 288*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x 289*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 1 290*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 291*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 292*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 293*9880d681SAndroid Build Coastguard Worker ret void 294*9880d681SAndroid Build Coastguard Worker} 295*9880d681SAndroid Build Coastguard Worker 296*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @misaligned_2_simple_read2_f32 297*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2_b32 298*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 299*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { 300*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 301*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i 302*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(3)* %arrayidx0, align 2 303*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 304*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x 305*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(3)* %arrayidx1, align 2 306*9880d681SAndroid Build Coastguard Worker %sum = fadd float %val0, %val1 307*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i 308*9880d681SAndroid Build Coastguard Worker store float %sum, float addrspace(1)* %out.gep, align 4 309*9880d681SAndroid Build Coastguard Worker ret void 310*9880d681SAndroid Build Coastguard Worker} 311*9880d681SAndroid Build Coastguard Worker 312*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f64 313*9880d681SAndroid Build Coastguard Worker; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}} 314*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8 315*9880d681SAndroid Build Coastguard Worker; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} 316*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dwordx2 [[RESULT]] 317*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 318*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f64(double addrspace(1)* %out) #0 { 319*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 320*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i 321*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 8 322*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 323*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x 324*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 8 325*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 326*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 327*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 8 328*9880d681SAndroid Build Coastguard Worker ret void 329*9880d681SAndroid Build Coastguard Worker} 330*9880d681SAndroid Build Coastguard Worker 331*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f64_max_offset 332*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255 333*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 334*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 { 335*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 336*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i 337*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 8 338*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 255 339*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x 340*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 8 341*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 342*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 343*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 8 344*9880d681SAndroid Build Coastguard Worker ret void 345*9880d681SAndroid Build Coastguard Worker} 346*9880d681SAndroid Build Coastguard Worker 347*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f64_too_far 348*9880d681SAndroid Build Coastguard Worker; SI-NOT ds_read2_b64 349*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} 350*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056 351*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 352*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 { 353*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 354*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i 355*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 8 356*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 257 357*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x 358*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 8 359*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 360*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 361*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 8 362*9880d681SAndroid Build Coastguard Worker ret void 363*9880d681SAndroid Build Coastguard Worker} 364*9880d681SAndroid Build Coastguard Worker 365*9880d681SAndroid Build Coastguard Worker; Alignment only 4 366*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @misaligned_read2_f64 367*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 368*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15 369*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 370*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { 371*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 372*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i 373*9880d681SAndroid Build Coastguard Worker %val0 = load double, double addrspace(3)* %arrayidx0, align 4 374*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 7 375*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x 376*9880d681SAndroid Build Coastguard Worker %val1 = load double, double addrspace(3)* %arrayidx1, align 4 377*9880d681SAndroid Build Coastguard Worker %sum = fadd double %val0, %val1 378*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i 379*9880d681SAndroid Build Coastguard Worker store double %sum, double addrspace(1)* %out.gep, align 4 380*9880d681SAndroid Build Coastguard Worker ret void 381*9880d681SAndroid Build Coastguard Worker} 382*9880d681SAndroid Build Coastguard Worker 383*9880d681SAndroid Build Coastguard Worker@foo = addrspace(3) global [4 x i32] undef, align 4 384*9880d681SAndroid Build Coastguard Worker 385*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @load_constant_adjacent_offsets 386*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 387*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 388*9880d681SAndroid Build Coastguard Workerdefine void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { 389*9880d681SAndroid Build Coastguard Worker %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 390*9880d681SAndroid Build Coastguard Worker %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 391*9880d681SAndroid Build Coastguard Worker %sum = add i32 %val0, %val1 392*9880d681SAndroid Build Coastguard Worker store i32 %sum, i32 addrspace(1)* %out, align 4 393*9880d681SAndroid Build Coastguard Worker ret void 394*9880d681SAndroid Build Coastguard Worker} 395*9880d681SAndroid Build Coastguard Worker 396*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @load_constant_disjoint_offsets 397*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 398*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2 399*9880d681SAndroid Build Coastguard Workerdefine void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { 400*9880d681SAndroid Build Coastguard Worker %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 401*9880d681SAndroid Build Coastguard Worker %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 402*9880d681SAndroid Build Coastguard Worker %sum = add i32 %val0, %val1 403*9880d681SAndroid Build Coastguard Worker store i32 %sum, i32 addrspace(1)* %out, align 4 404*9880d681SAndroid Build Coastguard Worker ret void 405*9880d681SAndroid Build Coastguard Worker} 406*9880d681SAndroid Build Coastguard Worker 407*9880d681SAndroid Build Coastguard Worker@bar = addrspace(3) global [4 x i64] undef, align 4 408*9880d681SAndroid Build Coastguard Worker 409*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @load_misaligned64_constant_offsets 410*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 411*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 412*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3 413*9880d681SAndroid Build Coastguard Workerdefine void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { 414*9880d681SAndroid Build Coastguard Worker %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 415*9880d681SAndroid Build Coastguard Worker %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 416*9880d681SAndroid Build Coastguard Worker %sum = add i64 %val0, %val1 417*9880d681SAndroid Build Coastguard Worker store i64 %sum, i64 addrspace(1)* %out, align 8 418*9880d681SAndroid Build Coastguard Worker ret void 419*9880d681SAndroid Build Coastguard Worker} 420*9880d681SAndroid Build Coastguard Worker 421*9880d681SAndroid Build Coastguard Worker@bar.large = addrspace(3) global [4096 x i64] undef, align 4 422*9880d681SAndroid Build Coastguard Worker 423*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @load_misaligned64_constant_large_offsets 424*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} 425*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000 426*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1 427*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1 428*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 429*9880d681SAndroid Build Coastguard Workerdefine void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) { 430*9880d681SAndroid Build Coastguard Worker %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4 431*9880d681SAndroid Build Coastguard Worker %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4 432*9880d681SAndroid Build Coastguard Worker %sum = add i64 %val0, %val1 433*9880d681SAndroid Build Coastguard Worker store i64 %sum, i64 addrspace(1)* %out, align 8 434*9880d681SAndroid Build Coastguard Worker ret void 435*9880d681SAndroid Build Coastguard Worker} 436*9880d681SAndroid Build Coastguard Worker 437*9880d681SAndroid Build Coastguard Worker@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4 438*9880d681SAndroid Build Coastguard Worker@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4 439*9880d681SAndroid Build Coastguard Worker 440*9880d681SAndroid Build Coastguard Workerdefine void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 { 441*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1 442*9880d681SAndroid Build Coastguard Worker %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1 443*9880d681SAndroid Build Coastguard Worker %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i 444*9880d681SAndroid Build Coastguard Worker %tmp16 = load float, float addrspace(3)* %arrayidx44, align 4 445*9880d681SAndroid Build Coastguard Worker %add47 = add nsw i32 %x.i, 1 446*9880d681SAndroid Build Coastguard Worker %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47 447*9880d681SAndroid Build Coastguard Worker %tmp17 = load float, float addrspace(3)* %arrayidx48, align 4 448*9880d681SAndroid Build Coastguard Worker %add51 = add nsw i32 %x.i, 16 449*9880d681SAndroid Build Coastguard Worker %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51 450*9880d681SAndroid Build Coastguard Worker %tmp18 = load float, float addrspace(3)* %arrayidx52, align 4 451*9880d681SAndroid Build Coastguard Worker %add55 = add nsw i32 %x.i, 17 452*9880d681SAndroid Build Coastguard Worker %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55 453*9880d681SAndroid Build Coastguard Worker %tmp19 = load float, float addrspace(3)* %arrayidx56, align 4 454*9880d681SAndroid Build Coastguard Worker %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i 455*9880d681SAndroid Build Coastguard Worker %tmp20 = load float, float addrspace(3)* %arrayidx60, align 4 456*9880d681SAndroid Build Coastguard Worker %add63 = add nsw i32 %y.i, 1 457*9880d681SAndroid Build Coastguard Worker %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63 458*9880d681SAndroid Build Coastguard Worker %tmp21 = load float, float addrspace(3)* %arrayidx64, align 4 459*9880d681SAndroid Build Coastguard Worker %add67 = add nsw i32 %y.i, 32 460*9880d681SAndroid Build Coastguard Worker %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67 461*9880d681SAndroid Build Coastguard Worker %tmp22 = load float, float addrspace(3)* %arrayidx68, align 4 462*9880d681SAndroid Build Coastguard Worker %add71 = add nsw i32 %y.i, 33 463*9880d681SAndroid Build Coastguard Worker %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71 464*9880d681SAndroid Build Coastguard Worker %tmp23 = load float, float addrspace(3)* %arrayidx72, align 4 465*9880d681SAndroid Build Coastguard Worker %add75 = add nsw i32 %y.i, 64 466*9880d681SAndroid Build Coastguard Worker %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75 467*9880d681SAndroid Build Coastguard Worker %tmp24 = load float, float addrspace(3)* %arrayidx76, align 4 468*9880d681SAndroid Build Coastguard Worker %add79 = add nsw i32 %y.i, 65 469*9880d681SAndroid Build Coastguard Worker %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79 470*9880d681SAndroid Build Coastguard Worker %tmp25 = load float, float addrspace(3)* %arrayidx80, align 4 471*9880d681SAndroid Build Coastguard Worker %sum.0 = fadd float %tmp16, %tmp17 472*9880d681SAndroid Build Coastguard Worker %sum.1 = fadd float %sum.0, %tmp18 473*9880d681SAndroid Build Coastguard Worker %sum.2 = fadd float %sum.1, %tmp19 474*9880d681SAndroid Build Coastguard Worker %sum.3 = fadd float %sum.2, %tmp20 475*9880d681SAndroid Build Coastguard Worker %sum.4 = fadd float %sum.3, %tmp21 476*9880d681SAndroid Build Coastguard Worker %sum.5 = fadd float %sum.4, %tmp22 477*9880d681SAndroid Build Coastguard Worker %sum.6 = fadd float %sum.5, %tmp23 478*9880d681SAndroid Build Coastguard Worker %sum.7 = fadd float %sum.6, %tmp24 479*9880d681SAndroid Build Coastguard Worker %sum.8 = fadd float %sum.7, %tmp25 480*9880d681SAndroid Build Coastguard Worker store float %sum.8, float addrspace(1)* %C, align 4 481*9880d681SAndroid Build Coastguard Worker ret void 482*9880d681SAndroid Build Coastguard Worker} 483*9880d681SAndroid Build Coastguard Worker 484*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 { 485*9880d681SAndroid Build Coastguard Worker %load = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 4 486*9880d681SAndroid Build Coastguard Worker store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8 487*9880d681SAndroid Build Coastguard Worker ret void 488*9880d681SAndroid Build Coastguard Worker} 489*9880d681SAndroid Build Coastguard Worker 490*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 { 491*9880d681SAndroid Build Coastguard Worker %load = load i64, i64 addrspace(3)* %in, align 4 492*9880d681SAndroid Build Coastguard Worker store i64 %load, i64 addrspace(1)* %out, align 8 493*9880d681SAndroid Build Coastguard Worker ret void 494*9880d681SAndroid Build Coastguard Worker} 495*9880d681SAndroid Build Coastguard Worker 496*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 497*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workgroup.id.x() #1 498*9880d681SAndroid Build Coastguard Worker 499*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 500*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workgroup.id.y() #1 501*9880d681SAndroid Build Coastguard Worker 502*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 503*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.x() #1 504*9880d681SAndroid Build Coastguard Worker 505*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 506*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.y() #1 507*9880d681SAndroid Build Coastguard Worker 508*9880d681SAndroid Build Coastguard Worker; Function Attrs: convergent nounwind 509*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.amdgcn.s.barrier() #2 510*9880d681SAndroid Build Coastguard Worker 511*9880d681SAndroid Build Coastguard Workerattributes #0 = { nounwind } 512*9880d681SAndroid Build Coastguard Workerattributes #1 = { nounwind readnone } 513*9880d681SAndroid Build Coastguard Workerattributes #2 = { convergent nounwind } 514