xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/merge-stores.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
2*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
5*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Worker; Run with devices with different unaligned load restrictions.
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Worker; TODO: Vector element tests
10*9880d681SAndroid Build Coastguard Worker; TODO: Non-zero base offset for load and store combinations
11*9880d681SAndroid Build Coastguard Worker; TODO: Same base addrspacecasted
12*9880d681SAndroid Build Coastguard Worker
13*9880d681SAndroid Build Coastguard Worker
14*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_constants_i8:
15*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_byte
16*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_byte
17*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
18*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
19*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
20*9880d681SAndroid Build Coastguard Worker
21*9880d681SAndroid Build Coastguard Worker  store i8 123, i8 addrspace(1)* %out.gep.1
22*9880d681SAndroid Build Coastguard Worker  store i8 456, i8 addrspace(1)* %out, align 2
23*9880d681SAndroid Build Coastguard Worker  ret void
24*9880d681SAndroid Build Coastguard Worker}
25*9880d681SAndroid Build Coastguard Worker
26*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_constants_i8_natural_align:
27*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_byte
28*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_byte
29*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
30*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 {
31*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
32*9880d681SAndroid Build Coastguard Worker
33*9880d681SAndroid Build Coastguard Worker  store i8 123, i8 addrspace(1)* %out.gep.1
34*9880d681SAndroid Build Coastguard Worker  store i8 456, i8 addrspace(1)* %out
35*9880d681SAndroid Build Coastguard Worker  ret void
36*9880d681SAndroid Build Coastguard Worker}
37*9880d681SAndroid Build Coastguard Worker
38*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_constants_i16:
39*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v
40*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 {
41*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
42*9880d681SAndroid Build Coastguard Worker
43*9880d681SAndroid Build Coastguard Worker  store i16 123, i16 addrspace(1)* %out.gep.1
44*9880d681SAndroid Build Coastguard Worker  store i16 456, i16 addrspace(1)* %out, align 4
45*9880d681SAndroid Build Coastguard Worker  ret void
46*9880d681SAndroid Build Coastguard Worker}
47*9880d681SAndroid Build Coastguard Worker
48*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_constants_0_i16:
49*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v
50*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 {
51*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
52*9880d681SAndroid Build Coastguard Worker
53*9880d681SAndroid Build Coastguard Worker  store i16 0, i16 addrspace(1)* %out.gep.1
54*9880d681SAndroid Build Coastguard Worker  store i16 0, i16 addrspace(1)* %out, align 4
55*9880d681SAndroid Build Coastguard Worker  ret void
56*9880d681SAndroid Build Coastguard Worker}
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_constants_i16_natural_align:
59*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_short
60*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_short
61*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
62*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 {
63*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
64*9880d681SAndroid Build Coastguard Worker
65*9880d681SAndroid Build Coastguard Worker  store i16 123, i16 addrspace(1)* %out.gep.1
66*9880d681SAndroid Build Coastguard Worker  store i16 456, i16 addrspace(1)* %out
67*9880d681SAndroid Build Coastguard Worker  ret void
68*9880d681SAndroid Build Coastguard Worker}
69*9880d681SAndroid Build Coastguard Worker
70*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_constants_i32:
71*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
72*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
73*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
74*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
75*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
76*9880d681SAndroid Build Coastguard Worker
77*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(1)* %out.gep.1
78*9880d681SAndroid Build Coastguard Worker  store i32 456, i32 addrspace(1)* %out
79*9880d681SAndroid Build Coastguard Worker  ret void
80*9880d681SAndroid Build Coastguard Worker}
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_constants_i32_f32:
83*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx2
84*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
85*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
86*9880d681SAndroid Build Coastguard Worker  %out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)*
87*9880d681SAndroid Build Coastguard Worker  store float 1.0, float addrspace(1)* %out.gep.1.bc
88*9880d681SAndroid Build Coastguard Worker  store i32 456, i32 addrspace(1)* %out
89*9880d681SAndroid Build Coastguard Worker  ret void
90*9880d681SAndroid Build Coastguard Worker}
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32:
93*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0
94*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b
95*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
96*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
97*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
98*9880d681SAndroid Build Coastguard Worker  %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
99*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(1)* %out.gep.1.bc
100*9880d681SAndroid Build Coastguard Worker  store float 4.0, float addrspace(1)* %out
101*9880d681SAndroid Build Coastguard Worker  ret void
102*9880d681SAndroid Build Coastguard Worker}
103*9880d681SAndroid Build Coastguard Worker
104*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_constants_i32:
105*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x14d{{$}}
106*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x1c8{{$}}
107*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}}
108*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}}
109*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]{{\]}}
110*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
111*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
112*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
113*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(1)* %out.gep.1
116*9880d681SAndroid Build Coastguard Worker  store i32 456, i32 addrspace(1)* %out.gep.2
117*9880d681SAndroid Build Coastguard Worker  store i32 333, i32 addrspace(1)* %out.gep.3
118*9880d681SAndroid Build Coastguard Worker  store i32 1234, i32 addrspace(1)* %out
119*9880d681SAndroid Build Coastguard Worker  ret void
120*9880d681SAndroid Build Coastguard Worker}
121*9880d681SAndroid Build Coastguard Worker
122*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
123*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4
124*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
125*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
126*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
127*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
128*9880d681SAndroid Build Coastguard Worker
129*9880d681SAndroid Build Coastguard Worker  store float 8.0, float addrspace(1)* %out
130*9880d681SAndroid Build Coastguard Worker  store float 1.0, float addrspace(1)* %out.gep.1
131*9880d681SAndroid Build Coastguard Worker  store float 2.0, float addrspace(1)* %out.gep.2
132*9880d681SAndroid Build Coastguard Worker  store float 4.0, float addrspace(1)* %out.gep.3
133*9880d681SAndroid Build Coastguard Worker  ret void
134*9880d681SAndroid Build Coastguard Worker}
135*9880d681SAndroid Build Coastguard Worker
136*9880d681SAndroid Build Coastguard Worker; First store is out of order.
137*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
138*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4
139*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
140*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
141*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
142*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
143*9880d681SAndroid Build Coastguard Worker
144*9880d681SAndroid Build Coastguard Worker  store float 1.0, float addrspace(1)* %out.gep.1
145*9880d681SAndroid Build Coastguard Worker  store float 2.0, float addrspace(1)* %out.gep.2
146*9880d681SAndroid Build Coastguard Worker  store float 4.0, float addrspace(1)* %out.gep.3
147*9880d681SAndroid Build Coastguard Worker  store float 8.0, float addrspace(1)* %out
148*9880d681SAndroid Build Coastguard Worker  ret void
149*9880d681SAndroid Build Coastguard Worker}
150*9880d681SAndroid Build Coastguard Worker
151*9880d681SAndroid Build Coastguard Worker; FIXME: Should be able to merge this
152*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
153*9880d681SAndroid Build Coastguard Worker; GCN-NOAA: buffer_store_dword v
154*9880d681SAndroid Build Coastguard Worker; GCN-NOAA: buffer_store_dword v
155*9880d681SAndroid Build Coastguard Worker; GCN-NOAA: buffer_store_dword v
156*9880d681SAndroid Build Coastguard Worker; GCN-NOAA: buffer_store_dword v
157*9880d681SAndroid Build Coastguard Worker
158*9880d681SAndroid Build Coastguard Worker; GCN-AA: buffer_store_dwordx2
159*9880d681SAndroid Build Coastguard Worker; GCN-AA: buffer_store_dword v
160*9880d681SAndroid Build Coastguard Worker; GCN-AA: buffer_store_dword v
161*9880d681SAndroid Build Coastguard Worker
162*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
163*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
164*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
165*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
166*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
167*9880d681SAndroid Build Coastguard Worker
168*9880d681SAndroid Build Coastguard Worker  %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
169*9880d681SAndroid Build Coastguard Worker  %out.gep.3.bc = bitcast float addrspace(1)* %out.gep.3 to i32 addrspace(1)*
170*9880d681SAndroid Build Coastguard Worker
171*9880d681SAndroid Build Coastguard Worker  store i32 11, i32 addrspace(1)* %out.gep.1.bc
172*9880d681SAndroid Build Coastguard Worker  store float 2.0, float addrspace(1)* %out.gep.2
173*9880d681SAndroid Build Coastguard Worker  store i32 17, i32 addrspace(1)* %out.gep.3.bc
174*9880d681SAndroid Build Coastguard Worker  store float 8.0, float addrspace(1)* %out
175*9880d681SAndroid Build Coastguard Worker  ret void
176*9880d681SAndroid Build Coastguard Worker}
177*9880d681SAndroid Build Coastguard Worker
178*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_3_constants_i32:
179*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dwordx2
180*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword
181*9880d681SAndroid Build Coastguard Worker; SI-NOT: buffer_store_dword
182*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
183*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
184*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
185*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
186*9880d681SAndroid Build Coastguard Worker
187*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(1)* %out.gep.1
188*9880d681SAndroid Build Coastguard Worker  store i32 456, i32 addrspace(1)* %out.gep.2
189*9880d681SAndroid Build Coastguard Worker  store i32 1234, i32 addrspace(1)* %out
190*9880d681SAndroid Build Coastguard Worker  ret void
191*9880d681SAndroid Build Coastguard Worker}
192*9880d681SAndroid Build Coastguard Worker
193*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_constants_i64:
194*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4
195*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
196*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
197*9880d681SAndroid Build Coastguard Worker
198*9880d681SAndroid Build Coastguard Worker  store i64 123, i64 addrspace(1)* %out.gep.1
199*9880d681SAndroid Build Coastguard Worker  store i64 456, i64 addrspace(1)* %out
200*9880d681SAndroid Build Coastguard Worker  ret void
201*9880d681SAndroid Build Coastguard Worker}
202*9880d681SAndroid Build Coastguard Worker
203*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_constants_i64:
204*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4
205*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4
206*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
207*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
208*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2
209*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3
210*9880d681SAndroid Build Coastguard Worker
211*9880d681SAndroid Build Coastguard Worker  store i64 123, i64 addrspace(1)* %out.gep.1
212*9880d681SAndroid Build Coastguard Worker  store i64 456, i64 addrspace(1)* %out.gep.2
213*9880d681SAndroid Build Coastguard Worker  store i64 333, i64 addrspace(1)* %out.gep.3
214*9880d681SAndroid Build Coastguard Worker  store i64 1234, i64 addrspace(1)* %out
215*9880d681SAndroid Build Coastguard Worker  ret void
216*9880d681SAndroid Build Coastguard Worker}
217*9880d681SAndroid Build Coastguard Worker
218*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32:
219*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
220*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx2 [[LOAD]]
221*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
222*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
223*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
224*9880d681SAndroid Build Coastguard Worker
225*9880d681SAndroid Build Coastguard Worker  %lo = load i32, i32 addrspace(1)* %in
226*9880d681SAndroid Build Coastguard Worker  %hi = load i32, i32 addrspace(1)* %in.gep.1
227*9880d681SAndroid Build Coastguard Worker
228*9880d681SAndroid Build Coastguard Worker  store i32 %lo, i32 addrspace(1)* %out
229*9880d681SAndroid Build Coastguard Worker  store i32 %hi, i32 addrspace(1)* %out.gep.1
230*9880d681SAndroid Build Coastguard Worker  ret void
231*9880d681SAndroid Build Coastguard Worker}
232*9880d681SAndroid Build Coastguard Worker
233*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base:
234*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
235*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx2 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
236*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
237*9880d681SAndroid Build Coastguard Worker  %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2
238*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3
239*9880d681SAndroid Build Coastguard Worker
240*9880d681SAndroid Build Coastguard Worker  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 2
241*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 3
242*9880d681SAndroid Build Coastguard Worker  %lo = load i32, i32 addrspace(1)* %in.gep.0
243*9880d681SAndroid Build Coastguard Worker  %hi = load i32, i32 addrspace(1)* %in.gep.1
244*9880d681SAndroid Build Coastguard Worker
245*9880d681SAndroid Build Coastguard Worker  store i32 %lo, i32 addrspace(1)* %out.gep.0
246*9880d681SAndroid Build Coastguard Worker  store i32 %hi, i32 addrspace(1)* %out.gep.1
247*9880d681SAndroid Build Coastguard Worker  ret void
248*9880d681SAndroid Build Coastguard Worker}
249*9880d681SAndroid Build Coastguard Worker
250*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_shuffle_i32:
251*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword v
252*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword v
253*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v
254*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v
255*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
256*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
257*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
258*9880d681SAndroid Build Coastguard Worker
259*9880d681SAndroid Build Coastguard Worker  %lo = load i32, i32 addrspace(1)* %in
260*9880d681SAndroid Build Coastguard Worker  %hi = load i32, i32 addrspace(1)* %in.gep.1
261*9880d681SAndroid Build Coastguard Worker
262*9880d681SAndroid Build Coastguard Worker  store i32 %hi, i32 addrspace(1)* %out
263*9880d681SAndroid Build Coastguard Worker  store i32 %lo, i32 addrspace(1)* %out.gep.1
264*9880d681SAndroid Build Coastguard Worker  ret void
265*9880d681SAndroid Build Coastguard Worker}
266*9880d681SAndroid Build Coastguard Worker
267*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32:
268*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
269*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4 [[LOAD]]
270*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
271*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
272*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
273*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
274*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
275*9880d681SAndroid Build Coastguard Worker  %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
276*9880d681SAndroid Build Coastguard Worker  %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
277*9880d681SAndroid Build Coastguard Worker
278*9880d681SAndroid Build Coastguard Worker  %x = load i32, i32 addrspace(1)* %in
279*9880d681SAndroid Build Coastguard Worker  %y = load i32, i32 addrspace(1)* %in.gep.1
280*9880d681SAndroid Build Coastguard Worker  %z = load i32, i32 addrspace(1)* %in.gep.2
281*9880d681SAndroid Build Coastguard Worker  %w = load i32, i32 addrspace(1)* %in.gep.3
282*9880d681SAndroid Build Coastguard Worker
283*9880d681SAndroid Build Coastguard Worker  store i32 %x, i32 addrspace(1)* %out
284*9880d681SAndroid Build Coastguard Worker  store i32 %y, i32 addrspace(1)* %out.gep.1
285*9880d681SAndroid Build Coastguard Worker  store i32 %z, i32 addrspace(1)* %out.gep.2
286*9880d681SAndroid Build Coastguard Worker  store i32 %w, i32 addrspace(1)* %out.gep.3
287*9880d681SAndroid Build Coastguard Worker  ret void
288*9880d681SAndroid Build Coastguard Worker}
289*9880d681SAndroid Build Coastguard Worker
290*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_3_adjacent_loads_i32:
291*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx2
292*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword v
293*9880d681SAndroid Build Coastguard Worker; GCN: s_waitcnt
294*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword v
295*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dwordx2 v
296*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
297*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
298*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
299*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
300*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
301*9880d681SAndroid Build Coastguard Worker  %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
302*9880d681SAndroid Build Coastguard Worker
303*9880d681SAndroid Build Coastguard Worker  %x = load i32, i32 addrspace(1)* %in
304*9880d681SAndroid Build Coastguard Worker  %y = load i32, i32 addrspace(1)* %in.gep.1
305*9880d681SAndroid Build Coastguard Worker  %z = load i32, i32 addrspace(1)* %in.gep.2
306*9880d681SAndroid Build Coastguard Worker
307*9880d681SAndroid Build Coastguard Worker  store i32 %x, i32 addrspace(1)* %out
308*9880d681SAndroid Build Coastguard Worker  store i32 %y, i32 addrspace(1)* %out.gep.1
309*9880d681SAndroid Build Coastguard Worker  store i32 %z, i32 addrspace(1)* %out.gep.2
310*9880d681SAndroid Build Coastguard Worker  ret void
311*9880d681SAndroid Build Coastguard Worker}
312*9880d681SAndroid Build Coastguard Worker
313*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_f32:
314*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
315*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4 [[LOAD]]
316*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
317*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
318*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
319*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
320*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr float, float addrspace(1)* %in, i32 1
321*9880d681SAndroid Build Coastguard Worker  %in.gep.2 = getelementptr float, float addrspace(1)* %in, i32 2
322*9880d681SAndroid Build Coastguard Worker  %in.gep.3 = getelementptr float, float addrspace(1)* %in, i32 3
323*9880d681SAndroid Build Coastguard Worker
324*9880d681SAndroid Build Coastguard Worker  %x = load float, float addrspace(1)* %in
325*9880d681SAndroid Build Coastguard Worker  %y = load float, float addrspace(1)* %in.gep.1
326*9880d681SAndroid Build Coastguard Worker  %z = load float, float addrspace(1)* %in.gep.2
327*9880d681SAndroid Build Coastguard Worker  %w = load float, float addrspace(1)* %in.gep.3
328*9880d681SAndroid Build Coastguard Worker
329*9880d681SAndroid Build Coastguard Worker  store float %x, float addrspace(1)* %out
330*9880d681SAndroid Build Coastguard Worker  store float %y, float addrspace(1)* %out.gep.1
331*9880d681SAndroid Build Coastguard Worker  store float %z, float addrspace(1)* %out.gep.2
332*9880d681SAndroid Build Coastguard Worker  store float %w, float addrspace(1)* %out.gep.3
333*9880d681SAndroid Build Coastguard Worker  ret void
334*9880d681SAndroid Build Coastguard Worker}
335*9880d681SAndroid Build Coastguard Worker
336*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base:
337*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
338*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
339*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
340*9880d681SAndroid Build Coastguard Worker  %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11
341*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12
342*9880d681SAndroid Build Coastguard Worker  %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13
343*9880d681SAndroid Build Coastguard Worker  %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 14
344*9880d681SAndroid Build Coastguard Worker  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 7
345*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 8
346*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 9
347*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 10
348*9880d681SAndroid Build Coastguard Worker
349*9880d681SAndroid Build Coastguard Worker  %x = load i32, i32 addrspace(1)* %in.gep.0
350*9880d681SAndroid Build Coastguard Worker  %y = load i32, i32 addrspace(1)* %in.gep.1
351*9880d681SAndroid Build Coastguard Worker  %z = load i32, i32 addrspace(1)* %in.gep.2
352*9880d681SAndroid Build Coastguard Worker  %w = load i32, i32 addrspace(1)* %in.gep.3
353*9880d681SAndroid Build Coastguard Worker
354*9880d681SAndroid Build Coastguard Worker  store i32 %x, i32 addrspace(1)* %out.gep.0
355*9880d681SAndroid Build Coastguard Worker  store i32 %y, i32 addrspace(1)* %out.gep.1
356*9880d681SAndroid Build Coastguard Worker  store i32 %z, i32 addrspace(1)* %out.gep.2
357*9880d681SAndroid Build Coastguard Worker  store i32 %w, i32 addrspace(1)* %out.gep.3
358*9880d681SAndroid Build Coastguard Worker  ret void
359*9880d681SAndroid Build Coastguard Worker}
360*9880d681SAndroid Build Coastguard Worker
361*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_inverse_i32:
362*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
363*9880d681SAndroid Build Coastguard Worker; GCN: s_barrier
364*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4 [[LOAD]]
365*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
366*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
367*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
368*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
369*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
370*9880d681SAndroid Build Coastguard Worker  %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
371*9880d681SAndroid Build Coastguard Worker  %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
372*9880d681SAndroid Build Coastguard Worker
373*9880d681SAndroid Build Coastguard Worker  %x = load i32, i32 addrspace(1)* %in
374*9880d681SAndroid Build Coastguard Worker  %y = load i32, i32 addrspace(1)* %in.gep.1
375*9880d681SAndroid Build Coastguard Worker  %z = load i32, i32 addrspace(1)* %in.gep.2
376*9880d681SAndroid Build Coastguard Worker  %w = load i32, i32 addrspace(1)* %in.gep.3
377*9880d681SAndroid Build Coastguard Worker
378*9880d681SAndroid Build Coastguard Worker  ; Make sure the barrier doesn't stop this
379*9880d681SAndroid Build Coastguard Worker  tail call void @llvm.amdgcn.s.barrier() #1
380*9880d681SAndroid Build Coastguard Worker
381*9880d681SAndroid Build Coastguard Worker  store i32 %w, i32 addrspace(1)* %out.gep.3
382*9880d681SAndroid Build Coastguard Worker  store i32 %z, i32 addrspace(1)* %out.gep.2
383*9880d681SAndroid Build Coastguard Worker  store i32 %y, i32 addrspace(1)* %out.gep.1
384*9880d681SAndroid Build Coastguard Worker  store i32 %x, i32 addrspace(1)* %out
385*9880d681SAndroid Build Coastguard Worker
386*9880d681SAndroid Build Coastguard Worker  ret void
387*9880d681SAndroid Build Coastguard Worker}
388*9880d681SAndroid Build Coastguard Worker
389*9880d681SAndroid Build Coastguard Worker; TODO: Re-packing of loaded register required. Maybe an IR pass
390*9880d681SAndroid Build Coastguard Worker; should catch this?
391*9880d681SAndroid Build Coastguard Worker
392*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_shuffle_i32:
393*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword v
394*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword v
395*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword v
396*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword v
397*9880d681SAndroid Build Coastguard Worker; GCN: s_barrier
398*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v
399*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v
400*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v
401*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v
402*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
403*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
404*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
405*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
406*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
407*9880d681SAndroid Build Coastguard Worker  %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
408*9880d681SAndroid Build Coastguard Worker  %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
409*9880d681SAndroid Build Coastguard Worker
410*9880d681SAndroid Build Coastguard Worker  %x = load i32, i32 addrspace(1)* %in
411*9880d681SAndroid Build Coastguard Worker  %y = load i32, i32 addrspace(1)* %in.gep.1
412*9880d681SAndroid Build Coastguard Worker  %z = load i32, i32 addrspace(1)* %in.gep.2
413*9880d681SAndroid Build Coastguard Worker  %w = load i32, i32 addrspace(1)* %in.gep.3
414*9880d681SAndroid Build Coastguard Worker
415*9880d681SAndroid Build Coastguard Worker  ; Make sure the barrier doesn't stop this
416*9880d681SAndroid Build Coastguard Worker  tail call void @llvm.amdgcn.s.barrier() #1
417*9880d681SAndroid Build Coastguard Worker
418*9880d681SAndroid Build Coastguard Worker  store i32 %w, i32 addrspace(1)* %out
419*9880d681SAndroid Build Coastguard Worker  store i32 %z, i32 addrspace(1)* %out.gep.1
420*9880d681SAndroid Build Coastguard Worker  store i32 %y, i32 addrspace(1)* %out.gep.2
421*9880d681SAndroid Build Coastguard Worker  store i32 %x, i32 addrspace(1)* %out.gep.3
422*9880d681SAndroid Build Coastguard Worker
423*9880d681SAndroid Build Coastguard Worker  ret void
424*9880d681SAndroid Build Coastguard Worker}
425*9880d681SAndroid Build Coastguard Worker
426*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8:
427*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
428*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[LOAD]]
429*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
430*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
431*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
432*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
433*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
434*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
435*9880d681SAndroid Build Coastguard Worker  %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
436*9880d681SAndroid Build Coastguard Worker  %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
437*9880d681SAndroid Build Coastguard Worker
438*9880d681SAndroid Build Coastguard Worker  %x = load i8, i8 addrspace(1)* %in, align 4
439*9880d681SAndroid Build Coastguard Worker  %y = load i8, i8 addrspace(1)* %in.gep.1
440*9880d681SAndroid Build Coastguard Worker  %z = load i8, i8 addrspace(1)* %in.gep.2
441*9880d681SAndroid Build Coastguard Worker  %w = load i8, i8 addrspace(1)* %in.gep.3
442*9880d681SAndroid Build Coastguard Worker
443*9880d681SAndroid Build Coastguard Worker  store i8 %x, i8 addrspace(1)* %out, align 4
444*9880d681SAndroid Build Coastguard Worker  store i8 %y, i8 addrspace(1)* %out.gep.1
445*9880d681SAndroid Build Coastguard Worker  store i8 %z, i8 addrspace(1)* %out.gep.2
446*9880d681SAndroid Build Coastguard Worker  store i8 %w, i8 addrspace(1)* %out.gep.3
447*9880d681SAndroid Build Coastguard Worker  ret void
448*9880d681SAndroid Build Coastguard Worker}
449*9880d681SAndroid Build Coastguard Worker
450*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8_natural_align:
451*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_ubyte
452*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_ubyte
453*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_ubyte
454*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_ubyte
455*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_byte
456*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_byte
457*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_byte
458*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_byte
459*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
460*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
461*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
462*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
463*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
464*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
465*9880d681SAndroid Build Coastguard Worker  %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
466*9880d681SAndroid Build Coastguard Worker  %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
467*9880d681SAndroid Build Coastguard Worker
468*9880d681SAndroid Build Coastguard Worker  %x = load i8, i8 addrspace(1)* %in
469*9880d681SAndroid Build Coastguard Worker  %y = load i8, i8 addrspace(1)* %in.gep.1
470*9880d681SAndroid Build Coastguard Worker  %z = load i8, i8 addrspace(1)* %in.gep.2
471*9880d681SAndroid Build Coastguard Worker  %w = load i8, i8 addrspace(1)* %in.gep.3
472*9880d681SAndroid Build Coastguard Worker
473*9880d681SAndroid Build Coastguard Worker  store i8 %x, i8 addrspace(1)* %out
474*9880d681SAndroid Build Coastguard Worker  store i8 %y, i8 addrspace(1)* %out.gep.1
475*9880d681SAndroid Build Coastguard Worker  store i8 %z, i8 addrspace(1)* %out.gep.2
476*9880d681SAndroid Build Coastguard Worker  store i8 %w, i8 addrspace(1)* %out.gep.3
477*9880d681SAndroid Build Coastguard Worker  ret void
478*9880d681SAndroid Build Coastguard Worker}
479*9880d681SAndroid Build Coastguard Worker
480*9880d681SAndroid Build Coastguard Worker; This works once AA is enabled on the subtarget
481*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
482*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
483*9880d681SAndroid Build Coastguard Worker
484*9880d681SAndroid Build Coastguard Worker; GCN-NOAA: buffer_store_dword v
485*9880d681SAndroid Build Coastguard Worker; GCN-NOAA: buffer_store_dword v
486*9880d681SAndroid Build Coastguard Worker; GCN-NOAA: buffer_store_dword v
487*9880d681SAndroid Build Coastguard Worker; GCN-NOAA: buffer_store_dword v
488*9880d681SAndroid Build Coastguard Worker
489*9880d681SAndroid Build Coastguard Worker; GCN-AA: buffer_store_dwordx4 [[LOAD]]
490*9880d681SAndroid Build Coastguard Worker
491*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
492*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
493*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
494*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
495*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
496*9880d681SAndroid Build Coastguard Worker  %vec = load <4 x i32>, <4 x i32> addrspace(1)* %in
497*9880d681SAndroid Build Coastguard Worker
498*9880d681SAndroid Build Coastguard Worker  %x = extractelement <4 x i32> %vec, i32 0
499*9880d681SAndroid Build Coastguard Worker  %y = extractelement <4 x i32> %vec, i32 1
500*9880d681SAndroid Build Coastguard Worker  %z = extractelement <4 x i32> %vec, i32 2
501*9880d681SAndroid Build Coastguard Worker  %w = extractelement <4 x i32> %vec, i32 3
502*9880d681SAndroid Build Coastguard Worker
503*9880d681SAndroid Build Coastguard Worker  store i32 %x, i32 addrspace(1)* %out
504*9880d681SAndroid Build Coastguard Worker  store i32 %y, i32 addrspace(1)* %out.gep.1
505*9880d681SAndroid Build Coastguard Worker  store i32 %z, i32 addrspace(1)* %out.gep.2
506*9880d681SAndroid Build Coastguard Worker  store i32 %w, i32 addrspace(1)* %out.gep.3
507*9880d681SAndroid Build Coastguard Worker  ret void
508*9880d681SAndroid Build Coastguard Worker}
509*9880d681SAndroid Build Coastguard Worker
510*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_local_store_2_constants_i8:
511*9880d681SAndroid Build Coastguard Worker; GCN: ds_write_b8
512*9880d681SAndroid Build Coastguard Worker; GCN: ds_write_b8
513*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
514*9880d681SAndroid Build Coastguard Workerdefine void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
515*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1
516*9880d681SAndroid Build Coastguard Worker
517*9880d681SAndroid Build Coastguard Worker  store i8 123, i8 addrspace(3)* %out.gep.1
518*9880d681SAndroid Build Coastguard Worker  store i8 456, i8 addrspace(3)* %out, align 2
519*9880d681SAndroid Build Coastguard Worker  ret void
520*9880d681SAndroid Build Coastguard Worker}
521*9880d681SAndroid Build Coastguard Worker
522*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_local_store_2_constants_i32:
523*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
524*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
525*9880d681SAndroid Build Coastguard Worker; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
526*9880d681SAndroid Build Coastguard Workerdefine void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
527*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
528*9880d681SAndroid Build Coastguard Worker
529*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(3)* %out.gep.1
530*9880d681SAndroid Build Coastguard Worker  store i32 456, i32 addrspace(3)* %out
531*9880d681SAndroid Build Coastguard Worker  ret void
532*9880d681SAndroid Build Coastguard Worker}
533*9880d681SAndroid Build Coastguard Worker
534*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_local_store_4_constants_i32:
535*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 [[K2:v[0-9]+]], 0x1c8
536*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 [[K3:v[0-9]+]], 0x14d
537*9880d681SAndroid Build Coastguard Worker; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K2]], [[K3]] offset0:2 offset1:3
538*9880d681SAndroid Build Coastguard Worker
539*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0x4d2
540*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x7b
541*9880d681SAndroid Build Coastguard Worker; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K0]], [[K1]] offset1:1
542*9880d681SAndroid Build Coastguard Worker
543*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
544*9880d681SAndroid Build Coastguard Workerdefine void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
545*9880d681SAndroid Build Coastguard Worker  %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
546*9880d681SAndroid Build Coastguard Worker  %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
547*9880d681SAndroid Build Coastguard Worker  %out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3
548*9880d681SAndroid Build Coastguard Worker
549*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(3)* %out.gep.1
550*9880d681SAndroid Build Coastguard Worker  store i32 456, i32 addrspace(3)* %out.gep.2
551*9880d681SAndroid Build Coastguard Worker  store i32 333, i32 addrspace(3)* %out.gep.3
552*9880d681SAndroid Build Coastguard Worker  store i32 1234, i32 addrspace(3)* %out
553*9880d681SAndroid Build Coastguard Worker  ret void
554*9880d681SAndroid Build Coastguard Worker}
555*9880d681SAndroid Build Coastguard Worker
556*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_5_constants_i32:
557*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 9{{$}}
558*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 v[[HI4:[0-9]+]], -12{{$}}
559*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI4]]{{\]}}
560*9880d681SAndroid Build Coastguard Worker; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}}
561*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v[[HI]]
562*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) {
563*9880d681SAndroid Build Coastguard Worker  store i32 9, i32 addrspace(1)* %out, align 4
564*9880d681SAndroid Build Coastguard Worker  %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
565*9880d681SAndroid Build Coastguard Worker  store i32 12, i32 addrspace(1)* %idx1, align 4
566*9880d681SAndroid Build Coastguard Worker  %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
567*9880d681SAndroid Build Coastguard Worker  store i32 16, i32 addrspace(1)* %idx2, align 4
568*9880d681SAndroid Build Coastguard Worker  %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
569*9880d681SAndroid Build Coastguard Worker  store i32 -12, i32 addrspace(1)* %idx3, align 4
570*9880d681SAndroid Build Coastguard Worker  %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
571*9880d681SAndroid Build Coastguard Worker  store i32 11, i32 addrspace(1)* %idx4, align 4
572*9880d681SAndroid Build Coastguard Worker  ret void
573*9880d681SAndroid Build Coastguard Worker}
574*9880d681SAndroid Build Coastguard Worker
575*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_6_constants_i32:
576*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4
577*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx2
578*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) {
579*9880d681SAndroid Build Coastguard Worker  store i32 13, i32 addrspace(1)* %out, align 4
580*9880d681SAndroid Build Coastguard Worker  %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
581*9880d681SAndroid Build Coastguard Worker  store i32 15, i32 addrspace(1)* %idx1, align 4
582*9880d681SAndroid Build Coastguard Worker  %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
583*9880d681SAndroid Build Coastguard Worker  store i32 62, i32 addrspace(1)* %idx2, align 4
584*9880d681SAndroid Build Coastguard Worker  %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
585*9880d681SAndroid Build Coastguard Worker  store i32 63, i32 addrspace(1)* %idx3, align 4
586*9880d681SAndroid Build Coastguard Worker  %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
587*9880d681SAndroid Build Coastguard Worker  store i32 11, i32 addrspace(1)* %idx4, align 4
588*9880d681SAndroid Build Coastguard Worker  %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
589*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(1)* %idx5, align 4
590*9880d681SAndroid Build Coastguard Worker  ret void
591*9880d681SAndroid Build Coastguard Worker}
592*9880d681SAndroid Build Coastguard Worker
593*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_7_constants_i32:
594*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4
595*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx2
596*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword v
597*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
598*9880d681SAndroid Build Coastguard Worker  store i32 34, i32 addrspace(1)* %out, align 4
599*9880d681SAndroid Build Coastguard Worker  %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
600*9880d681SAndroid Build Coastguard Worker  store i32 999, i32 addrspace(1)* %idx1, align 4
601*9880d681SAndroid Build Coastguard Worker  %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
602*9880d681SAndroid Build Coastguard Worker  store i32 65, i32 addrspace(1)* %idx2, align 4
603*9880d681SAndroid Build Coastguard Worker  %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
604*9880d681SAndroid Build Coastguard Worker  store i32 33, i32 addrspace(1)* %idx3, align 4
605*9880d681SAndroid Build Coastguard Worker  %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
606*9880d681SAndroid Build Coastguard Worker  store i32 98, i32 addrspace(1)* %idx4, align 4
607*9880d681SAndroid Build Coastguard Worker  %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
608*9880d681SAndroid Build Coastguard Worker  store i32 91, i32 addrspace(1)* %idx5, align 4
609*9880d681SAndroid Build Coastguard Worker  %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
610*9880d681SAndroid Build Coastguard Worker  store i32 212, i32 addrspace(1)* %idx6, align 4
611*9880d681SAndroid Build Coastguard Worker  ret void
612*9880d681SAndroid Build Coastguard Worker}
613*9880d681SAndroid Build Coastguard Worker
614*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}merge_global_store_8_constants_i32:
615*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4
616*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dwordx4
617*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
618*9880d681SAndroid Build Coastguard Workerdefine void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
619*9880d681SAndroid Build Coastguard Worker  store i32 34, i32 addrspace(1)* %out, align 4
620*9880d681SAndroid Build Coastguard Worker  %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
621*9880d681SAndroid Build Coastguard Worker  store i32 999, i32 addrspace(1)* %idx1, align 4
622*9880d681SAndroid Build Coastguard Worker  %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
623*9880d681SAndroid Build Coastguard Worker  store i32 65, i32 addrspace(1)* %idx2, align 4
624*9880d681SAndroid Build Coastguard Worker  %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
625*9880d681SAndroid Build Coastguard Worker  store i32 33, i32 addrspace(1)* %idx3, align 4
626*9880d681SAndroid Build Coastguard Worker  %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
627*9880d681SAndroid Build Coastguard Worker  store i32 98, i32 addrspace(1)* %idx4, align 4
628*9880d681SAndroid Build Coastguard Worker  %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
629*9880d681SAndroid Build Coastguard Worker  store i32 91, i32 addrspace(1)* %idx5, align 4
630*9880d681SAndroid Build Coastguard Worker  %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
631*9880d681SAndroid Build Coastguard Worker  store i32 212, i32 addrspace(1)* %idx6, align 4
632*9880d681SAndroid Build Coastguard Worker  %idx7 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 7
633*9880d681SAndroid Build Coastguard Worker  store i32 999, i32 addrspace(1)* %idx7, align 4
634*9880d681SAndroid Build Coastguard Worker  ret void
635*9880d681SAndroid Build Coastguard Worker}
636*9880d681SAndroid Build Coastguard Worker
637*9880d681SAndroid Build Coastguard Worker; This requires handling of scalar_to_vector for v2i64 to avoid
638*9880d681SAndroid Build Coastguard Worker; scratch usage.
639*9880d681SAndroid Build Coastguard Worker; FIXME: Should do single load and store
640*9880d681SAndroid Build Coastguard Worker
641*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}copy_v3i32_align4:
642*9880d681SAndroid Build Coastguard Worker; GCN-NOT: SCRATCH_RSRC_DWORD
643*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
644*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
645*9880d681SAndroid Build Coastguard Worker; GCN-NOT: offen
646*9880d681SAndroid Build Coastguard Worker; GCN: s_waitcnt vmcnt
647*9880d681SAndroid Build Coastguard Worker; GCN-NOT: offen
648*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
649*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
650*9880d681SAndroid Build Coastguard Worker
651*9880d681SAndroid Build Coastguard Worker; GCN: ScratchSize: 0{{$}}
652*9880d681SAndroid Build Coastguard Workerdefine void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
653*9880d681SAndroid Build Coastguard Worker  %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4
654*9880d681SAndroid Build Coastguard Worker  store <3 x i32> %vec, <3 x i32> addrspace(1)* %out
655*9880d681SAndroid Build Coastguard Worker  ret void
656*9880d681SAndroid Build Coastguard Worker}
657*9880d681SAndroid Build Coastguard Worker
658*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}copy_v3i64_align4:
659*9880d681SAndroid Build Coastguard Worker; GCN-NOT: SCRATCH_RSRC_DWORD
660*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
661*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
662*9880d681SAndroid Build Coastguard Worker; GCN-NOT: offen
663*9880d681SAndroid Build Coastguard Worker; GCN: s_waitcnt vmcnt
664*9880d681SAndroid Build Coastguard Worker; GCN-NOT: offen
665*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
666*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
667*9880d681SAndroid Build Coastguard Worker; GCN: ScratchSize: 0{{$}}
668*9880d681SAndroid Build Coastguard Workerdefine void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
669*9880d681SAndroid Build Coastguard Worker  %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
670*9880d681SAndroid Build Coastguard Worker  store <3 x i64> %vec, <3 x i64> addrspace(1)* %out
671*9880d681SAndroid Build Coastguard Worker  ret void
672*9880d681SAndroid Build Coastguard Worker}
673*9880d681SAndroid Build Coastguard Worker
674*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}copy_v3f32_align4:
675*9880d681SAndroid Build Coastguard Worker; GCN-NOT: SCRATCH_RSRC_DWORD
676*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
677*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
678*9880d681SAndroid Build Coastguard Worker; GCN-NOT: offen
679*9880d681SAndroid Build Coastguard Worker; GCN: s_waitcnt vmcnt
680*9880d681SAndroid Build Coastguard Worker; GCN-NOT: offen
681*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
682*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
683*9880d681SAndroid Build Coastguard Worker; GCN: ScratchSize: 0{{$}}
684*9880d681SAndroid Build Coastguard Workerdefine void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
685*9880d681SAndroid Build Coastguard Worker  %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
686*9880d681SAndroid Build Coastguard Worker  %fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0>
687*9880d681SAndroid Build Coastguard Worker  store <3 x float> %fadd, <3 x float> addrspace(1)* %out
688*9880d681SAndroid Build Coastguard Worker  ret void
689*9880d681SAndroid Build Coastguard Worker}
690*9880d681SAndroid Build Coastguard Worker
691*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}copy_v3f64_align4:
692*9880d681SAndroid Build Coastguard Worker; GCN-NOT: SCRATCH_RSRC_DWORD
693*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
694*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
695*9880d681SAndroid Build Coastguard Worker; GCN-NOT: offen
696*9880d681SAndroid Build Coastguard Worker; GCN: s_waitcnt vmcnt
697*9880d681SAndroid Build Coastguard Worker; GCN-NOT: offen
698*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
699*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
700*9880d681SAndroid Build Coastguard Worker; GCN: ScratchSize: 0{{$}}
701*9880d681SAndroid Build Coastguard Workerdefine void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
702*9880d681SAndroid Build Coastguard Worker  %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
703*9880d681SAndroid Build Coastguard Worker  %fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0>
704*9880d681SAndroid Build Coastguard Worker  store <3 x double> %fadd, <3 x double> addrspace(1)* %out
705*9880d681SAndroid Build Coastguard Worker  ret void
706*9880d681SAndroid Build Coastguard Worker}
707*9880d681SAndroid Build Coastguard Worker
708*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.amdgcn.s.barrier() #1
709*9880d681SAndroid Build Coastguard Worker
710*9880d681SAndroid Build Coastguard Workerattributes #0 = { nounwind }
711*9880d681SAndroid Build Coastguard Workerattributes #1 = { convergent nounwind }
712