xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker@lds = addrspace(3) global [512 x float] undef, align 4
4*9880d681SAndroid Build Coastguard Worker@lds.f64 = addrspace(3) global [512 x double] undef, align 8
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f32_0_1
8*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
9*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0)
10*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
11*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]]
12*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
13*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
14*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
15*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
16*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
17*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 64
18*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
19*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
20*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
21*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
22*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
23*9880d681SAndroid Build Coastguard Worker  ret void
24*9880d681SAndroid Build Coastguard Worker}
25*9880d681SAndroid Build Coastguard Worker
26*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f32_1_2
27*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
28*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0)
29*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
30*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]]
31*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
32*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
33*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
34*9880d681SAndroid Build Coastguard Worker  %add.x.0 = add nsw i32 %x.i, 64
35*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
36*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
37*9880d681SAndroid Build Coastguard Worker  %add.x.1 = add nsw i32 %x.i, 128
38*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
39*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
40*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
41*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
42*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
43*9880d681SAndroid Build Coastguard Worker  ret void
44*9880d681SAndroid Build Coastguard Worker}
45*9880d681SAndroid Build Coastguard Worker
46*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f32_max_offset
47*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:255 offset1:1
48*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0)
49*9880d681SAndroid Build Coastguard Worker; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
50*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]]
51*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
52*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
53*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
54*9880d681SAndroid Build Coastguard Worker  %add.x.0 = add nsw i32 %x.i, 64
55*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
56*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
57*9880d681SAndroid Build Coastguard Worker  %add.x.1 = add nsw i32 %x.i, 16320
58*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
59*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
60*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
61*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
62*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
63*9880d681SAndroid Build Coastguard Worker  ret void
64*9880d681SAndroid Build Coastguard Worker}
65*9880d681SAndroid Build Coastguard Worker
66*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f32_over_max_offset
67*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b32
68*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
69*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
70*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]{{$}}
71*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
72*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
73*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
74*9880d681SAndroid Build Coastguard Worker  %add.x.0 = add nsw i32 %x.i, 64
75*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
76*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
77*9880d681SAndroid Build Coastguard Worker  %add.x.1 = add nsw i32 %x.i, 16384
78*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
79*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
80*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
81*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
82*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
83*9880d681SAndroid Build Coastguard Worker  ret void
84*9880d681SAndroid Build Coastguard Worker}
85*9880d681SAndroid Build Coastguard Worker
86*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @odd_invalid_read2st64_f32_0
87*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b32
88*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
89*9880d681SAndroid Build Coastguard Workerdefine void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
90*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
91*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
92*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
93*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 63
94*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
95*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
96*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
97*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
98*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
99*9880d681SAndroid Build Coastguard Worker  ret void
100*9880d681SAndroid Build Coastguard Worker}
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @odd_invalid_read2st64_f32_1
103*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b32
104*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
105*9880d681SAndroid Build Coastguard Workerdefine void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
106*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
107*9880d681SAndroid Build Coastguard Worker  %add.x.0 = add nsw i32 %x.i, 64
108*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
109*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
110*9880d681SAndroid Build Coastguard Worker  %add.x.1 = add nsw i32 %x.i, 127
111*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
112*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
113*9880d681SAndroid Build Coastguard Worker  %sum = fadd float %val0, %val1
114*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
115*9880d681SAndroid Build Coastguard Worker  store float %sum, float addrspace(1)* %out.gep, align 4
116*9880d681SAndroid Build Coastguard Worker  ret void
117*9880d681SAndroid Build Coastguard Worker}
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f64_0_1
120*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
121*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0)
122*9880d681SAndroid Build Coastguard Worker; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
123*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dwordx2 [[RESULT]]
124*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
125*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
126*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
127*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
128*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
129*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 64
130*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
131*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
132*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
133*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
134*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 8
135*9880d681SAndroid Build Coastguard Worker  ret void
136*9880d681SAndroid Build Coastguard Worker}
137*9880d681SAndroid Build Coastguard Worker
138*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f64_1_2
139*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
140*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0)
141*9880d681SAndroid Build Coastguard Worker; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
142*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dwordx2 [[RESULT]]
143*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
144*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
145*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
146*9880d681SAndroid Build Coastguard Worker  %add.x.0 = add nsw i32 %x.i, 64
147*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
148*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
149*9880d681SAndroid Build Coastguard Worker  %add.x.1 = add nsw i32 %x.i, 128
150*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
151*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
152*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
153*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
154*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 8
155*9880d681SAndroid Build Coastguard Worker  ret void
156*9880d681SAndroid Build Coastguard Worker}
157*9880d681SAndroid Build Coastguard Worker
158*9880d681SAndroid Build Coastguard Worker; Alignment only
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @misaligned_read2st64_f64
161*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
162*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
163*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
164*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
165*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
166*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
167*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 4
168*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 64
169*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
170*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 4
171*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
172*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
173*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 4
174*9880d681SAndroid Build Coastguard Worker  ret void
175*9880d681SAndroid Build Coastguard Worker}
176*9880d681SAndroid Build Coastguard Worker
177*9880d681SAndroid Build Coastguard Worker; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff
178*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f64_max_offset
179*9880d681SAndroid Build Coastguard Worker; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:127 offset1:4
180*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0)
181*9880d681SAndroid Build Coastguard Worker; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}, v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}
182*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dwordx2 [[RESULT]]
183*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
184*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
185*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
186*9880d681SAndroid Build Coastguard Worker  %add.x.0 = add nsw i32 %x.i, 256
187*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
188*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
189*9880d681SAndroid Build Coastguard Worker  %add.x.1 = add nsw i32 %x.i, 8128
190*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
191*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
192*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
193*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
194*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 8
195*9880d681SAndroid Build Coastguard Worker  ret void
196*9880d681SAndroid Build Coastguard Worker}
197*9880d681SAndroid Build Coastguard Worker
198*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_read2st64_f64_over_max_offset
199*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b64
200*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
201*9880d681SAndroid Build Coastguard Worker; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
202*9880d681SAndroid Build Coastguard Worker; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
203*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
204*9880d681SAndroid Build Coastguard Workerdefine void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
205*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
206*9880d681SAndroid Build Coastguard Worker  %add.x.0 = add nsw i32 %x.i, 64
207*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
208*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
209*9880d681SAndroid Build Coastguard Worker  %add.x.1 = add nsw i32 %x.i, 8192
210*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
211*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
212*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
213*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
214*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 8
215*9880d681SAndroid Build Coastguard Worker  ret void
216*9880d681SAndroid Build Coastguard Worker}
217*9880d681SAndroid Build Coastguard Worker
218*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @invalid_read2st64_f64_odd_offset
219*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st64_b64
220*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
221*9880d681SAndroid Build Coastguard Workerdefine void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
222*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
223*9880d681SAndroid Build Coastguard Worker  %add.x.0 = add nsw i32 %x.i, 64
224*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
225*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
226*9880d681SAndroid Build Coastguard Worker  %add.x.1 = add nsw i32 %x.i, 8129
227*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
228*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
229*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
230*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
231*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 8
232*9880d681SAndroid Build Coastguard Worker  ret void
233*9880d681SAndroid Build Coastguard Worker}
234*9880d681SAndroid Build Coastguard Worker
235*9880d681SAndroid Build Coastguard Worker; The stride of 8 elements is 8 * 8 bytes. We need to make sure the
236*9880d681SAndroid Build Coastguard Worker; stride in elements, not bytes, is a multiple of 64.
237*9880d681SAndroid Build Coastguard Worker
238*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @byte_size_only_divisible_64_read2_f64
239*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_read2st_b64
240*9880d681SAndroid Build Coastguard Worker; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
241*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
242*9880d681SAndroid Build Coastguard Workerdefine void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
243*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
244*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
245*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
246*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
247*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
248*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
249*9880d681SAndroid Build Coastguard Worker  %sum = fadd double %val0, %val1
250*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
251*9880d681SAndroid Build Coastguard Worker  store double %sum, double addrspace(1)* %out.gep, align 4
252*9880d681SAndroid Build Coastguard Worker  ret void
253*9880d681SAndroid Build Coastguard Worker}
254*9880d681SAndroid Build Coastguard Worker
255*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
256*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.x() #1
257*9880d681SAndroid Build Coastguard Worker
258*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
259*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.y() #1
260*9880d681SAndroid Build Coastguard Worker
261*9880d681SAndroid Build Coastguard Workerattributes #0 = { nounwind }
262*9880d681SAndroid Build Coastguard Workerattributes #1 = { nounwind readnone }
263