xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/ds_read2.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker; FIXME: We don't get cases where the address was an SGPR because we
4*9880d681SAndroid Build Coastguard Worker; get a copy to the address register for each one.
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Worker@lds = addrspace(3) global [512 x float] undef, align 4
7*9880d681SAndroid Build Coastguard Worker@lds.f64 = addrspace(3) global [512 x double] undef, align 8
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32
10*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8
11*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0)
12*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
13*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]]
14*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
15*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32(float addrspace(1)* %out) #0 {
16*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
17*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
18*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
19*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
20*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
21*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
22*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
23*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
24*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
25*9880d681SAndroid Build Coastguard Worker  ret void
26*9880d681SAndroid Build Coastguard Worker}
27*9880d681SAndroid Build Coastguard Worker
28*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_max_offset
29*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255
30*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0)
31*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
32*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]]
33*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
34*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
35*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
36*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
37*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
38*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 255
39*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
40*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
41*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
42*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
43*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
44*9880d681SAndroid Build Coastguard Worker  ret void
45*9880d681SAndroid Build Coastguard Worker}
46*9880d681SAndroid Build Coastguard Worker
47*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_too_far
48*9880d681SAndroid Build Coastguard Worker; SI-NOT ds_read2_b32
49*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
50*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
51*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
52*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
53*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
54*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
55*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
56*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 257
57*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
58*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
59*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
60*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
61*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
62*9880d681SAndroid Build Coastguard Worker  ret void
63*9880d681SAndroid Build Coastguard Worker}
64*9880d681SAndroid Build Coastguard Worker
65*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_x2
66*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
67*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
68*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
69*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
70*9880d681SAndroid Build Coastguard Worker  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
71*9880d681SAndroid Build Coastguard Worker  %idx.0 = add nsw i32 %tid.x, 0
72*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
73*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
74*9880d681SAndroid Build Coastguard Worker
75*9880d681SAndroid Build Coastguard Worker  %idx.1 = add nsw i32 %tid.x, 8
76*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
77*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
78*9880d681SAndroid Build Coastguard Worker  %sum.0 = fadd float %val0, %val1
79*9880d681SAndroid Build Coastguard Worker
80*9880d681SAndroid Build Coastguard Worker  %idx.2 = add nsw i32 %tid.x, 11
81*9880d681SAndroid Build Coastguard Worker  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
82*9880d681SAndroid Build Coastguard Worker  %val2 = load float, float addrspace(3)* %arrayidx2, align 4
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Worker  %idx.3 = add nsw i32 %tid.x, 27
85*9880d681SAndroid Build Coastguard Worker  %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
86*9880d681SAndroid Build Coastguard Worker  %val3 = load float, float addrspace(3)* %arrayidx3, align 4
87*9880d681SAndroid Build Coastguard Worker  %sum.1 = fadd float %val2, %val3
88*9880d681SAndroid Build Coastguard Worker
89*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %sum.0, %sum.1
90*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0
91*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
92*9880d681SAndroid Build Coastguard Worker  ret void
93*9880d681SAndroid Build Coastguard Worker}
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Worker; Make sure there is an instruction between the two sets of reads.
96*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_x2_barrier
97*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
98*9880d681SAndroid Build Coastguard Worker; SI: s_barrier
99*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
100*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
101*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
102*9880d681SAndroid Build Coastguard Worker  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
103*9880d681SAndroid Build Coastguard Worker  %idx.0 = add nsw i32 %tid.x, 0
104*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
105*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
106*9880d681SAndroid Build Coastguard Worker
107*9880d681SAndroid Build Coastguard Worker  %idx.1 = add nsw i32 %tid.x, 8
108*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
109*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
110*9880d681SAndroid Build Coastguard Worker  %sum.0 = fadd float %val0, %val1
111*9880d681SAndroid Build Coastguard Worker
112*9880d681SAndroid Build Coastguard Worker  call void @llvm.amdgcn.s.barrier() #2
113*9880d681SAndroid Build Coastguard Worker
114*9880d681SAndroid Build Coastguard Worker  %idx.2 = add nsw i32 %tid.x, 11
115*9880d681SAndroid Build Coastguard Worker  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
116*9880d681SAndroid Build Coastguard Worker  %val2 = load float, float addrspace(3)* %arrayidx2, align 4
117*9880d681SAndroid Build Coastguard Worker
118*9880d681SAndroid Build Coastguard Worker  %idx.3 = add nsw i32 %tid.x, 27
119*9880d681SAndroid Build Coastguard Worker  %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
120*9880d681SAndroid Build Coastguard Worker  %val3 = load float, float addrspace(3)* %arrayidx3, align 4
121*9880d681SAndroid Build Coastguard Worker  %sum.1 = fadd float %val2, %val3
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %sum.0, %sum.1
124*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0
125*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
126*9880d681SAndroid Build Coastguard Worker  ret void
127*9880d681SAndroid Build Coastguard Worker}
128*9880d681SAndroid Build Coastguard Worker
129*9880d681SAndroid Build Coastguard Worker; For some reason adding something to the base address for the first
130*9880d681SAndroid Build Coastguard Worker; element results in only folding the inner pair.
131*9880d681SAndroid Build Coastguard Worker
132*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_x2_nonzero_base
133*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
134*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
135*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
136*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
137*9880d681SAndroid Build Coastguard Worker  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
138*9880d681SAndroid Build Coastguard Worker  %idx.0 = add nsw i32 %tid.x, 2
139*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
140*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
141*9880d681SAndroid Build Coastguard Worker
142*9880d681SAndroid Build Coastguard Worker  %idx.1 = add nsw i32 %tid.x, 8
143*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
144*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
145*9880d681SAndroid Build Coastguard Worker  %sum.0 = fadd float %val0, %val1
146*9880d681SAndroid Build Coastguard Worker
147*9880d681SAndroid Build Coastguard Worker  %idx.2 = add nsw i32 %tid.x, 11
148*9880d681SAndroid Build Coastguard Worker  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
149*9880d681SAndroid Build Coastguard Worker  %val2 = load float, float addrspace(3)* %arrayidx2, align 4
150*9880d681SAndroid Build Coastguard Worker
151*9880d681SAndroid Build Coastguard Worker  %idx.3 = add nsw i32 %tid.x, 27
152*9880d681SAndroid Build Coastguard Worker  %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
153*9880d681SAndroid Build Coastguard Worker  %val3 = load float, float addrspace(3)* %arrayidx3, align 4
154*9880d681SAndroid Build Coastguard Worker  %sum.1 = fadd float %val2, %val3
155*9880d681SAndroid Build Coastguard Worker
156*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %sum.0, %sum.1
157*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0
158*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
159*9880d681SAndroid Build Coastguard Worker  ret void
160*9880d681SAndroid Build Coastguard Worker}
161*9880d681SAndroid Build Coastguard Worker
162*9880d681SAndroid Build Coastguard Worker; Be careful of vectors of pointers. We don't know if the 2 pointers
163*9880d681SAndroid Build Coastguard Worker; in the vectors are really the same base, so this is not safe to
164*9880d681SAndroid Build Coastguard Worker; merge.
165*9880d681SAndroid Build Coastguard Worker; Base pointers come from different subregister of same super
166*9880d681SAndroid Build Coastguard Worker; register. We can't safely merge this.
167*9880d681SAndroid Build Coastguard Worker
168*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @read2_ptr_is_subreg_arg_f32
169*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2_b32
170*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32
171*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32
172*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
173*9880d681SAndroid Build Coastguard Workerdefine void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
174*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
175*9880d681SAndroid Build Coastguard Worker  %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
176*9880d681SAndroid Build Coastguard Worker  %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
177*9880d681SAndroid Build Coastguard Worker  %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
178*9880d681SAndroid Build Coastguard Worker  %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
179*9880d681SAndroid Build Coastguard Worker  %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
180*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %gep.0, align 4
181*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %gep.1, align 4
182*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
183*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
184*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
185*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
186*9880d681SAndroid Build Coastguard Worker  ret void
187*9880d681SAndroid Build Coastguard Worker}
188*9880d681SAndroid Build Coastguard Worker
189*9880d681SAndroid Build Coastguard Worker; Apply a constant scalar offset after the pointer vector extract.  We
190*9880d681SAndroid Build Coastguard Worker; are rejecting merges that have the same, constant 0 offset, so make
191*9880d681SAndroid Build Coastguard Worker; sure we are really rejecting it because of the different
192*9880d681SAndroid Build Coastguard Worker; subregisters.
193*9880d681SAndroid Build Coastguard Worker
194*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @read2_ptr_is_subreg_arg_offset_f32
195*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2_b32
196*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32
197*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32
198*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
199*9880d681SAndroid Build Coastguard Workerdefine void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
200*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
201*9880d681SAndroid Build Coastguard Worker  %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
202*9880d681SAndroid Build Coastguard Worker  %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
203*9880d681SAndroid Build Coastguard Worker  %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
204*9880d681SAndroid Build Coastguard Worker  %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
205*9880d681SAndroid Build Coastguard Worker  %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
206*9880d681SAndroid Build Coastguard Worker
207*9880d681SAndroid Build Coastguard Worker  ; Apply an additional offset after the vector that will be more obviously folded.
208*9880d681SAndroid Build Coastguard Worker  %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
209*9880d681SAndroid Build Coastguard Worker
210*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %gep.0, align 4
211*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %gep.1.offset, align 4
212*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
213*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
214*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
215*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
216*9880d681SAndroid Build Coastguard Worker  ret void
217*9880d681SAndroid Build Coastguard Worker}
218*9880d681SAndroid Build Coastguard Worker
219*9880d681SAndroid Build Coastguard Worker; SI-LABEL: {{^}}read2_ptr_is_subreg_f32:
220*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}}
221*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
222*9880d681SAndroid Build Coastguard Workerdefine void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
223*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
224*9880d681SAndroid Build Coastguard Worker  %ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0
225*9880d681SAndroid Build Coastguard Worker  %ptr.1 = insertelement <2 x [512 x float] addrspace(3)*> %ptr.0, [512 x float] addrspace(3)* @lds, i32 1
226*9880d681SAndroid Build Coastguard Worker  %x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
227*9880d681SAndroid Build Coastguard Worker  %x.i.v.1 = insertelement <2 x i32> %x.i.v.0, i32 %x.i, i32 1
228*9880d681SAndroid Build Coastguard Worker  %idx = add <2 x i32> %x.i.v.1, <i32 0, i32 8>
229*9880d681SAndroid Build Coastguard Worker  %gep = getelementptr inbounds [512 x float], <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx
230*9880d681SAndroid Build Coastguard Worker  %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
231*9880d681SAndroid Build Coastguard Worker  %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
232*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %gep.0, align 4
233*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %gep.1, align 4
234*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
235*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
236*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
237*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
238*9880d681SAndroid Build Coastguard Worker  ret void
239*9880d681SAndroid Build Coastguard Worker}
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_volatile_0
242*9880d681SAndroid Build Coastguard Worker; SI-NOT ds_read2_b32
243*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
244*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
245*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
246*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
247*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
248*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
249*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4
250*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
251*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
252*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
253*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
254*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
255*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
256*9880d681SAndroid Build Coastguard Worker  ret void
257*9880d681SAndroid Build Coastguard Worker}
258*9880d681SAndroid Build Coastguard Worker
259*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f32_volatile_1
260*9880d681SAndroid Build Coastguard Worker; SI-NOT ds_read2_b32
261*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
262*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
263*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
264*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
265*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
266*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
267*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
268*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
269*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
270*9880d681SAndroid Build Coastguard Worker  %val1 = load volatile float, float addrspace(3)* %arrayidx1, align 4
271*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
272*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
273*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
274*9880d681SAndroid Build Coastguard Worker  ret void
275*9880d681SAndroid Build Coastguard Worker}
276*9880d681SAndroid Build Coastguard Worker
277*9880d681SAndroid Build Coastguard Worker; Can't fold since not correctly aligned.
278*9880d681SAndroid Build Coastguard Worker; XXX: This isn't really testing anything useful now. I think CI
279*9880d681SAndroid Build Coastguard Worker; allows unaligned LDS accesses, which would be a problem here.
280*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @unaligned_read2_f32
281*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2_b32
282*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
283*9880d681SAndroid Build Coastguard Workerdefine void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
284*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
285*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
286*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 1
287*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
288*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
289*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 1
290*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
291*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
292*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
293*9880d681SAndroid Build Coastguard Worker  ret void
294*9880d681SAndroid Build Coastguard Worker}
295*9880d681SAndroid Build Coastguard Worker
296*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @misaligned_2_simple_read2_f32
297*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2_b32
298*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
299*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
300*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
301*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
302*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 2
303*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
304*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
305*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 2
306*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
307*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
308*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
309*9880d681SAndroid Build Coastguard Worker  ret void
310*9880d681SAndroid Build Coastguard Worker}
311*9880d681SAndroid Build Coastguard Worker
312*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f64
313*9880d681SAndroid Build Coastguard Worker; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
314*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8
315*9880d681SAndroid Build Coastguard Worker; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
316*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dwordx2 [[RESULT]]
317*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
318*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f64(double addrspace(1)* %out) #0 {
319*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
320*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
321*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
322*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
323*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
324*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
325*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
326*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
327*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 8
328*9880d681SAndroid Build Coastguard Worker  ret void
329*9880d681SAndroid Build Coastguard Worker}
330*9880d681SAndroid Build Coastguard Worker
331*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f64_max_offset
332*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255
333*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
334*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
335*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
336*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
337*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
338*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 255
339*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
340*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
341*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
342*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
343*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 8
344*9880d681SAndroid Build Coastguard Worker  ret void
345*9880d681SAndroid Build Coastguard Worker}
346*9880d681SAndroid Build Coastguard Worker
347*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2_f64_too_far
348*9880d681SAndroid Build Coastguard Worker; SI-NOT ds_read2_b64
349*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
350*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
351*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
352*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
353*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
354*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
355*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
356*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 257
357*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
358*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
359*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
360*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
361*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 8
362*9880d681SAndroid Build Coastguard Worker  ret void
363*9880d681SAndroid Build Coastguard Worker}
364*9880d681SAndroid Build Coastguard Worker
365*9880d681SAndroid Build Coastguard Worker; Alignment only 4
366*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @misaligned_read2_f64
367*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
368*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
369*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
370*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
371*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
372*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
373*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 4
374*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 7
375*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
376*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 4
377*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
378*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
379*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 4
380*9880d681SAndroid Build Coastguard Worker  ret void
381*9880d681SAndroid Build Coastguard Worker}
382*9880d681SAndroid Build Coastguard Worker
383*9880d681SAndroid Build Coastguard Worker@foo = addrspace(3) global [4 x i32] undef, align 4
384*9880d681SAndroid Build Coastguard Worker
385*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @load_constant_adjacent_offsets
386*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
387*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
388*9880d681SAndroid Build Coastguard Workerdefine void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
389*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
390*9880d681SAndroid Build Coastguard Worker  %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
391*9880d681SAndroid Build Coastguard Worker  %sum = add i32 %val0, %val1
392*9880d681SAndroid Build Coastguard Worker  store i32 %sum, i32 addrspace(1)* %out, align 4
393*9880d681SAndroid Build Coastguard Worker  ret void
394*9880d681SAndroid Build Coastguard Worker}
395*9880d681SAndroid Build Coastguard Worker
396*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @load_constant_disjoint_offsets
397*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
398*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2
399*9880d681SAndroid Build Coastguard Workerdefine void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
400*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
401*9880d681SAndroid Build Coastguard Worker  %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
402*9880d681SAndroid Build Coastguard Worker  %sum = add i32 %val0, %val1
403*9880d681SAndroid Build Coastguard Worker  store i32 %sum, i32 addrspace(1)* %out, align 4
404*9880d681SAndroid Build Coastguard Worker  ret void
405*9880d681SAndroid Build Coastguard Worker}
406*9880d681SAndroid Build Coastguard Worker
407*9880d681SAndroid Build Coastguard Worker@bar = addrspace(3) global [4 x i64] undef, align 4
408*9880d681SAndroid Build Coastguard Worker
409*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @load_misaligned64_constant_offsets
410*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
411*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
412*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
413*9880d681SAndroid Build Coastguard Workerdefine void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
414*9880d681SAndroid Build Coastguard Worker  %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
415*9880d681SAndroid Build Coastguard Worker  %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
416*9880d681SAndroid Build Coastguard Worker  %sum = add i64 %val0, %val1
417*9880d681SAndroid Build Coastguard Worker  store i64 %sum, i64 addrspace(1)* %out, align 8
418*9880d681SAndroid Build Coastguard Worker  ret void
419*9880d681SAndroid Build Coastguard Worker}
420*9880d681SAndroid Build Coastguard Worker
421*9880d681SAndroid Build Coastguard Worker@bar.large = addrspace(3) global [4096 x i64] undef, align 4
422*9880d681SAndroid Build Coastguard Worker
423*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @load_misaligned64_constant_large_offsets
424*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
425*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
426*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1
427*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1
428*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
429*9880d681SAndroid Build Coastguard Workerdefine void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
430*9880d681SAndroid Build Coastguard Worker  %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
431*9880d681SAndroid Build Coastguard Worker  %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
432*9880d681SAndroid Build Coastguard Worker  %sum = add i64 %val0, %val1
433*9880d681SAndroid Build Coastguard Worker  store i64 %sum, i64 addrspace(1)* %out, align 8
434*9880d681SAndroid Build Coastguard Worker  ret void
435*9880d681SAndroid Build Coastguard Worker}
436*9880d681SAndroid Build Coastguard Worker
437*9880d681SAndroid Build Coastguard Worker@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
438*9880d681SAndroid Build Coastguard Worker@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
439*9880d681SAndroid Build Coastguard Worker
440*9880d681SAndroid Build Coastguard Workerdefine void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 {
441*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
442*9880d681SAndroid Build Coastguard Worker  %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1
443*9880d681SAndroid Build Coastguard Worker  %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
444*9880d681SAndroid Build Coastguard Worker  %tmp16 = load float, float addrspace(3)* %arrayidx44, align 4
445*9880d681SAndroid Build Coastguard Worker  %add47 = add nsw i32 %x.i, 1
446*9880d681SAndroid Build Coastguard Worker  %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
447*9880d681SAndroid Build Coastguard Worker  %tmp17 = load float, float addrspace(3)* %arrayidx48, align 4
448*9880d681SAndroid Build Coastguard Worker  %add51 = add nsw i32 %x.i, 16
449*9880d681SAndroid Build Coastguard Worker  %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
450*9880d681SAndroid Build Coastguard Worker  %tmp18 = load float, float addrspace(3)* %arrayidx52, align 4
451*9880d681SAndroid Build Coastguard Worker  %add55 = add nsw i32 %x.i, 17
452*9880d681SAndroid Build Coastguard Worker  %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
453*9880d681SAndroid Build Coastguard Worker  %tmp19 = load float, float addrspace(3)* %arrayidx56, align 4
454*9880d681SAndroid Build Coastguard Worker  %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
455*9880d681SAndroid Build Coastguard Worker  %tmp20 = load float, float addrspace(3)* %arrayidx60, align 4
456*9880d681SAndroid Build Coastguard Worker  %add63 = add nsw i32 %y.i, 1
457*9880d681SAndroid Build Coastguard Worker  %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
458*9880d681SAndroid Build Coastguard Worker  %tmp21 = load float, float addrspace(3)* %arrayidx64, align 4
459*9880d681SAndroid Build Coastguard Worker  %add67 = add nsw i32 %y.i, 32
460*9880d681SAndroid Build Coastguard Worker  %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
461*9880d681SAndroid Build Coastguard Worker  %tmp22 = load float, float addrspace(3)* %arrayidx68, align 4
462*9880d681SAndroid Build Coastguard Worker  %add71 = add nsw i32 %y.i, 33
463*9880d681SAndroid Build Coastguard Worker  %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
464*9880d681SAndroid Build Coastguard Worker  %tmp23 = load float, float addrspace(3)* %arrayidx72, align 4
465*9880d681SAndroid Build Coastguard Worker  %add75 = add nsw i32 %y.i, 64
466*9880d681SAndroid Build Coastguard Worker  %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
467*9880d681SAndroid Build Coastguard Worker  %tmp24 = load float, float addrspace(3)* %arrayidx76, align 4
468*9880d681SAndroid Build Coastguard Worker  %add79 = add nsw i32 %y.i, 65
469*9880d681SAndroid Build Coastguard Worker  %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
470*9880d681SAndroid Build Coastguard Worker  %tmp25 = load float, float addrspace(3)* %arrayidx80, align 4
471*9880d681SAndroid Build Coastguard Worker  %sum.0 = fadd float %tmp16, %tmp17
472*9880d681SAndroid Build Coastguard Worker  %sum.1 = fadd float %sum.0, %tmp18
473*9880d681SAndroid Build Coastguard Worker  %sum.2 = fadd float %sum.1, %tmp19
474*9880d681SAndroid Build Coastguard Worker  %sum.3 = fadd float %sum.2, %tmp20
475*9880d681SAndroid Build Coastguard Worker  %sum.4 = fadd float %sum.3, %tmp21
476*9880d681SAndroid Build Coastguard Worker  %sum.5 = fadd float %sum.4, %tmp22
477*9880d681SAndroid Build Coastguard Worker  %sum.6 = fadd float %sum.5, %tmp23
478*9880d681SAndroid Build Coastguard Worker  %sum.7 = fadd float %sum.6, %tmp24
479*9880d681SAndroid Build Coastguard Worker  %sum.8 = fadd float %sum.7, %tmp25
480*9880d681SAndroid Build Coastguard Worker  store float %sum.8, float addrspace(1)* %C, align 4
481*9880d681SAndroid Build Coastguard Worker  ret void
482*9880d681SAndroid Build Coastguard Worker}
483*9880d681SAndroid Build Coastguard Worker
484*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 {
485*9880d681SAndroid Build Coastguard Worker  %load = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 4
486*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8
487*9880d681SAndroid Build Coastguard Worker  ret void
488*9880d681SAndroid Build Coastguard Worker}
489*9880d681SAndroid Build Coastguard Worker
490*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 {
491*9880d681SAndroid Build Coastguard Worker  %load = load i64, i64 addrspace(3)* %in, align 4
492*9880d681SAndroid Build Coastguard Worker  store i64 %load, i64 addrspace(1)* %out, align 8
493*9880d681SAndroid Build Coastguard Worker  ret void
494*9880d681SAndroid Build Coastguard Worker}
495*9880d681SAndroid Build Coastguard Worker
496*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
497*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workgroup.id.x() #1
498*9880d681SAndroid Build Coastguard Worker
499*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
500*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workgroup.id.y() #1
501*9880d681SAndroid Build Coastguard Worker
502*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
503*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.x() #1
504*9880d681SAndroid Build Coastguard Worker
505*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
506*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.y() #1
507*9880d681SAndroid Build Coastguard Worker
508*9880d681SAndroid Build Coastguard Worker; Function Attrs: convergent nounwind
509*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.amdgcn.s.barrier() #2
510*9880d681SAndroid Build Coastguard Worker
511*9880d681SAndroid Build Coastguard Workerattributes #0 = { nounwind }
512*9880d681SAndroid Build Coastguard Workerattributes #1 = { nounwind readnone }
513*9880d681SAndroid Build Coastguard Workerattributes #2 = { convergent nounwind }
514