1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN: -fallow-half-arguments-and-returns -ffp-contract=fast -S -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg \
4 // RUN: | FileCheck %s
5
6 // Test new aarch64 intrinsics and types
7
8 #include <arm_neon.h>
9
10 // CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
11 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
12 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t v1,int8x8_t v2)13 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
14 return vadd_s8(v1, v2);
15 }
16
17 // CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
18 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
19 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t v1,int16x4_t v2)20 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
21 return vadd_s16(v1, v2);
22 }
23
24 // CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
25 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
26 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t v1,int32x2_t v2)27 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
28 return vadd_s32(v1, v2);
29 }
30
31 // CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %v1, <1 x i64> %v2) #0 {
32 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
33 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t v1,int64x1_t v2)34 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
35 return vadd_s64(v1, v2);
36 }
37
38 // CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %v1, <2 x float> %v2) #0 {
39 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
40 // CHECK: ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t v1,float32x2_t v2)41 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
42 return vadd_f32(v1, v2);
43 }
44
45 // CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
46 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
47 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t v1,uint8x8_t v2)48 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
49 return vadd_u8(v1, v2);
50 }
51
52 // CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
53 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
54 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t v1,uint16x4_t v2)55 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
56 return vadd_u16(v1, v2);
57 }
58
59 // CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
60 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
61 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t v1,uint32x2_t v2)62 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
63 return vadd_u32(v1, v2);
64 }
65
66 // CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %v1, <1 x i64> %v2) #0 {
67 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
68 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t v1,uint64x1_t v2)69 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
70 return vadd_u64(v1, v2);
71 }
72
73 // CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
74 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
75 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t v1,int8x16_t v2)76 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
77 return vaddq_s8(v1, v2);
78 }
79
80 // CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
81 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
82 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t v1,int16x8_t v2)83 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
84 return vaddq_s16(v1, v2);
85 }
86
87 // CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
88 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
89 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t v1,int32x4_t v2)90 int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t v2) {
91 return vaddq_s32(v1, v2);
92 }
93
94 // CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
95 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
96 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t v1,int64x2_t v2)97 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
98 return vaddq_s64(v1, v2);
99 }
100
101 // CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
102 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
103 // CHECK: ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t v1,float32x4_t v2)104 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
105 return vaddq_f32(v1, v2);
106 }
107
108 // CHECK-LABEL: define <2 x double> @test_vaddq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
109 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
110 // CHECK: ret <2 x double> [[ADD_I]]
test_vaddq_f64(float64x2_t v1,float64x2_t v2)111 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
112 return vaddq_f64(v1, v2);
113 }
114
115 // CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
116 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
117 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t v1,uint8x16_t v2)118 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
119 return vaddq_u8(v1, v2);
120 }
121
122 // CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
123 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
124 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t v1,uint16x8_t v2)125 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
126 return vaddq_u16(v1, v2);
127 }
128
129 // CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
130 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
131 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t v1,uint32x4_t v2)132 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
133 return vaddq_u32(v1, v2);
134 }
135
136 // CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
137 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
138 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t v1,uint64x2_t v2)139 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
140 return vaddq_u64(v1, v2);
141 }
142
143 // CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
144 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
145 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t v1,int8x8_t v2)146 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
147 return vsub_s8(v1, v2);
148 }
149 // CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
150 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
151 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t v1,int16x4_t v2)152 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
153 return vsub_s16(v1, v2);
154 }
155 // CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
156 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
157 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t v1,int32x2_t v2)158 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
159 return vsub_s32(v1, v2);
160 }
161
162 // CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %v1, <1 x i64> %v2) #0 {
163 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
164 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t v1,int64x1_t v2)165 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
166 return vsub_s64(v1, v2);
167 }
168
169 // CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %v1, <2 x float> %v2) #0 {
170 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
171 // CHECK: ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t v1,float32x2_t v2)172 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
173 return vsub_f32(v1, v2);
174 }
175
176 // CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
177 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
178 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t v1,uint8x8_t v2)179 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
180 return vsub_u8(v1, v2);
181 }
182
183 // CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
184 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
185 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t v1,uint16x4_t v2)186 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
187 return vsub_u16(v1, v2);
188 }
189
190 // CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
191 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
192 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t v1,uint32x2_t v2)193 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
194 return vsub_u32(v1, v2);
195 }
196
197 // CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %v1, <1 x i64> %v2) #0 {
198 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
199 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t v1,uint64x1_t v2)200 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
201 return vsub_u64(v1, v2);
202 }
203
204 // CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
205 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
206 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t v1,int8x16_t v2)207 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
208 return vsubq_s8(v1, v2);
209 }
210
211 // CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
212 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
213 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t v1,int16x8_t v2)214 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
215 return vsubq_s16(v1, v2);
216 }
217
218 // CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
219 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
220 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t v1,int32x4_t v2)221 int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t v2) {
222 return vsubq_s32(v1, v2);
223 }
224
225 // CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
226 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
227 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t v1,int64x2_t v2)228 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
229 return vsubq_s64(v1, v2);
230 }
231
232 // CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
233 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
234 // CHECK: ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t v1,float32x4_t v2)235 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
236 return vsubq_f32(v1, v2);
237 }
238
239 // CHECK-LABEL: define <2 x double> @test_vsubq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
240 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
241 // CHECK: ret <2 x double> [[SUB_I]]
test_vsubq_f64(float64x2_t v1,float64x2_t v2)242 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
243 return vsubq_f64(v1, v2);
244 }
245
246 // CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
247 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
248 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t v1,uint8x16_t v2)249 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
250 return vsubq_u8(v1, v2);
251 }
252
253 // CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
254 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
255 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t v1,uint16x8_t v2)256 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
257 return vsubq_u16(v1, v2);
258 }
259
260 // CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
261 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
262 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t v1,uint32x4_t v2)263 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
264 return vsubq_u32(v1, v2);
265 }
266
267 // CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
268 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
269 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t v1,uint64x2_t v2)270 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
271 return vsubq_u64(v1, v2);
272 }
273
274 // CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
275 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
276 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t v1,int8x8_t v2)277 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
278 return vmul_s8(v1, v2);
279 }
280
281 // CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
282 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
283 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t v1,int16x4_t v2)284 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
285 return vmul_s16(v1, v2);
286 }
287
288 // CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
289 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
290 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t v1,int32x2_t v2)291 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
292 return vmul_s32(v1, v2);
293 }
294
295 // CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %v1, <2 x float> %v2) #0 {
296 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
297 // CHECK: ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t v1,float32x2_t v2)298 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
299 return vmul_f32(v1, v2);
300 }
301
302
303 // CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
304 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
305 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t v1,uint8x8_t v2)306 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
307 return vmul_u8(v1, v2);
308 }
309
310 // CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
311 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
312 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t v1,uint16x4_t v2)313 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
314 return vmul_u16(v1, v2);
315 }
316
317 // CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
318 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
319 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t v1,uint32x2_t v2)320 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
321 return vmul_u32(v1, v2);
322 }
323
324 // CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
325 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
326 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t v1,int8x16_t v2)327 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
328 return vmulq_s8(v1, v2);
329 }
330
331 // CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
332 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
333 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t v1,int16x8_t v2)334 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
335 return vmulq_s16(v1, v2);
336 }
337
338 // CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
339 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
340 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t v1,int32x4_t v2)341 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
342 return vmulq_s32(v1, v2);
343 }
344
345 // CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
346 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
347 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t v1,uint8x16_t v2)348 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
349 return vmulq_u8(v1, v2);
350 }
351
352 // CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
353 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
354 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t v1,uint16x8_t v2)355 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
356 return vmulq_u16(v1, v2);
357 }
358
359 // CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
360 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
361 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t v1,uint32x4_t v2)362 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
363 return vmulq_u32(v1, v2);
364 }
365
366 // CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
367 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
368 // CHECK: ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t v1,float32x4_t v2)369 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
370 return vmulq_f32(v1, v2);
371 }
372
373 // CHECK-LABEL: define <2 x double> @test_vmulq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
374 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
375 // CHECK: ret <2 x double> [[MUL_I]]
test_vmulq_f64(float64x2_t v1,float64x2_t v2)376 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
377 return vmulq_f64(v1, v2);
378 }
379
380 // CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
381 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
382 // CHECK: ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t v1,poly8x8_t v2)383 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
384 // test_vmul_p8
385 return vmul_p8(v1, v2);
386 // pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
387 }
388
389 // CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
390 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
391 // CHECK: ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t v1,poly8x16_t v2)392 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
393 // test_vmulq_p8
394 return vmulq_p8(v1, v2);
395 // pmul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
396 }
397
398
399 // CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
400 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
401 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
402 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)403 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
404 return vmla_s8(v1, v2, v3);
405 }
406
407 // CHECK-LABEL: define <8 x i8> @test_vmla_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
408 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
409 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
410 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
411 // CHECK: ret <8 x i8> [[TMP0]]
test_vmla_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)412 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
413 return vmla_s16(v1, v2, v3);
414 }
415
416 // CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
417 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
418 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
419 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)420 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
421 return vmla_s32(v1, v2, v3);
422 }
423
424 // CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
425 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
426 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
427 // CHECK: ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)428 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
429 return vmla_f32(v1, v2, v3);
430 }
431
432 // CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
433 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
434 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
435 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)436 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
437 return vmla_u8(v1, v2, v3);
438 }
439
440 // CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
441 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
442 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
443 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)444 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
445 return vmla_u16(v1, v2, v3);
446 }
447
448 // CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
449 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
450 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
451 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)452 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
453 return vmla_u32(v1, v2, v3);
454 }
455
456 // CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
457 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
458 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
459 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)460 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
461 return vmlaq_s8(v1, v2, v3);
462 }
463
464 // CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
465 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
466 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
467 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)468 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
469 return vmlaq_s16(v1, v2, v3);
470 }
471
472 // CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
473 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
474 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
475 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)476 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
477 return vmlaq_s32(v1, v2, v3);
478 }
479
480 // CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
481 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
482 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
483 // CHECK: ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)484 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
485 return vmlaq_f32(v1, v2, v3);
486 }
487
488 // CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
489 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
490 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
491 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)492 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
493 return vmlaq_u8(v1, v2, v3);
494 }
495
496 // CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
497 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
498 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
499 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)500 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
501 return vmlaq_u16(v1, v2, v3);
502 }
503
504 // CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
505 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
506 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
507 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)508 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
509 return vmlaq_u32(v1, v2, v3);
510 }
511
512 // CHECK-LABEL: define <2 x double> @test_vmlaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
513 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
514 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
515 // CHECK: ret <2 x double> [[ADD_I]]
test_vmlaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)516 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
517 return vmlaq_f64(v1, v2, v3);
518 }
519
520 // CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
521 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
522 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
523 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)524 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
525 return vmls_s8(v1, v2, v3);
526 }
527
528 // CHECK-LABEL: define <8 x i8> @test_vmls_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
529 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
530 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
531 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
532 // CHECK: ret <8 x i8> [[TMP0]]
test_vmls_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)533 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
534 return vmls_s16(v1, v2, v3);
535 }
536
537 // CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
538 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
539 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
540 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)541 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
542 return vmls_s32(v1, v2, v3);
543 }
544
545 // CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
546 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
547 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
548 // CHECK: ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)549 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
550 return vmls_f32(v1, v2, v3);
551 }
552
553 // CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
554 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
555 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
556 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)557 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
558 return vmls_u8(v1, v2, v3);
559 }
560
561 // CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
562 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
563 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
564 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)565 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
566 return vmls_u16(v1, v2, v3);
567 }
568
569 // CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
570 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
571 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
572 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)573 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
574 return vmls_u32(v1, v2, v3);
575 }
576 // CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
577 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
578 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
579 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)580 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
581 return vmlsq_s8(v1, v2, v3);
582 }
583
584 // CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
585 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
586 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
587 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)588 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
589 return vmlsq_s16(v1, v2, v3);
590 }
591
592 // CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
593 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
594 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
595 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)596 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
597 return vmlsq_s32(v1, v2, v3);
598 }
599
600 // CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
601 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
602 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
603 // CHECK: ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)604 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
605 return vmlsq_f32(v1, v2, v3);
606 }
607 // CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
608 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
609 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
610 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)611 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
612 return vmlsq_u8(v1, v2, v3);
613 }
614
615 // CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
616 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
617 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
618 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)619 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
620 return vmlsq_u16(v1, v2, v3);
621 }
622
623 // CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
624 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
625 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
626 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)627 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
628 return vmlsq_u32(v1, v2, v3);
629 }
630
631 // CHECK-LABEL: define <2 x double> @test_vmlsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
632 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
633 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
634 // CHECK: ret <2 x double> [[SUB_I]]
test_vmlsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)635 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
636 return vmlsq_f64(v1, v2, v3);
637 }
638 // CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
639 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
640 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
641 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
642 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
643 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
644 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
645 // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
646 // CHECK: ret <2 x float> [[TMP6]]
test_vfma_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)647 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
648 return vfma_f32(v1, v2, v3);
649 }
650
651 // CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
652 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
653 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
654 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
655 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
656 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
657 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
658 // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
659 // CHECK: ret <4 x float> [[TMP6]]
test_vfmaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)660 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
661 return vfmaq_f32(v1, v2, v3);
662 }
663
664 // CHECK-LABEL: define <2 x double> @test_vfmaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
665 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
666 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
667 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
668 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
669 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
670 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
671 // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4
672 // CHECK: ret <2 x double> [[TMP6]]
test_vfmaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)673 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
674 return vfmaq_f64(v1, v2, v3);
675 }
676 // CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
677 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2
678 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
679 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
680 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
681 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
682 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
683 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
684 // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
685 // CHECK: ret <2 x float> [[TMP6]]
test_vfms_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)686 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
687 return vfms_f32(v1, v2, v3);
688 }
689
690 // CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
691 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2
692 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
693 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
694 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
695 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
696 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
697 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
698 // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
699 // CHECK: ret <4 x float> [[TMP6]]
test_vfmsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)700 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
701 return vfmsq_f32(v1, v2, v3);
702 }
703
704 // CHECK-LABEL: define <2 x double> @test_vfmsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
705 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2
706 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
707 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
708 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
709 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
710 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
711 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
712 // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4
713 // CHECK: ret <2 x double> [[TMP6]]
test_vfmsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)714 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
715 return vfmsq_f64(v1, v2, v3);
716 }
717
718 // CHECK-LABEL: define <2 x double> @test_vdivq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
719 // CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
720 // CHECK: ret <2 x double> [[DIV_I]]
test_vdivq_f64(float64x2_t v1,float64x2_t v2)721 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
722 return vdivq_f64(v1, v2);
723 }
724
725 // CHECK-LABEL: define <4 x float> @test_vdivq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
726 // CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
727 // CHECK: ret <4 x float> [[DIV_I]]
test_vdivq_f32(float32x4_t v1,float32x4_t v2)728 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
729 return vdivq_f32(v1, v2);
730 }
731
732 // CHECK-LABEL: define <2 x float> @test_vdiv_f32(<2 x float> %v1, <2 x float> %v2) #0 {
733 // CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
734 // CHECK: ret <2 x float> [[DIV_I]]
test_vdiv_f32(float32x2_t v1,float32x2_t v2)735 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
736 return vdiv_f32(v1, v2);
737 }
738
739 // CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
740 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4
741 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
742 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)743 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
744 return vaba_s8(v1, v2, v3);
745 }
746
747 // CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
748 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
749 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
750 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
751 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
752 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
753 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
754 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)755 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
756 return vaba_s16(v1, v2, v3);
757 }
758
759 // CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
760 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
761 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
762 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
763 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
764 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
765 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
766 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)767 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
768 return vaba_s32(v1, v2, v3);
769 }
770
771 // CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
772 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4
773 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
774 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)775 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
776 return vaba_u8(v1, v2, v3);
777 }
778
779 // CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
780 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
781 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
782 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
783 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
784 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
785 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
786 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)787 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
788 return vaba_u16(v1, v2, v3);
789 }
790
791 // CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
792 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
793 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
794 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
795 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
796 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
797 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
798 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)799 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
800 return vaba_u32(v1, v2, v3);
801 }
802
803 // CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
804 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4
805 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
806 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)807 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
808 return vabaq_s8(v1, v2, v3);
809 }
810
811 // CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
813 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
814 // CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
815 // CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
816 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
817 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
818 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)819 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
820 return vabaq_s16(v1, v2, v3);
821 }
822
823 // CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
824 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
825 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
826 // CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
827 // CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
828 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
829 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
830 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)831 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
832 return vabaq_s32(v1, v2, v3);
833 }
834
835 // CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
836 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4
837 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
838 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)839 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
840 return vabaq_u8(v1, v2, v3);
841 }
842
843 // CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
844 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
845 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
846 // CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
847 // CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
848 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
849 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
850 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)851 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
852 return vabaq_u16(v1, v2, v3);
853 }
854
855 // CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
856 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
857 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
858 // CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
859 // CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
860 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
861 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
862 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)863 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
864 return vabaq_u32(v1, v2, v3);
865 }
866
867 // CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
868 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
869 // CHECK: ret <8 x i8> [[VABD_I]]
test_vabd_s8(int8x8_t v1,int8x8_t v2)870 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
871 return vabd_s8(v1, v2);
872 }
873
874 // CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
875 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
876 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
877 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
878 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
879 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
880 // CHECK: ret <4 x i16> [[VABD2_I]]
test_vabd_s16(int16x4_t v1,int16x4_t v2)881 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
882 return vabd_s16(v1, v2);
883 }
884
885 // CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
886 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
887 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
888 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
889 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
890 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
891 // CHECK: ret <2 x i32> [[VABD2_I]]
test_vabd_s32(int32x2_t v1,int32x2_t v2)892 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
893 return vabd_s32(v1, v2);
894 }
895
896 // CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
897 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
898 // CHECK: ret <8 x i8> [[VABD_I]]
test_vabd_u8(uint8x8_t v1,uint8x8_t v2)899 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
900 return vabd_u8(v1, v2);
901 }
902
903 // CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
904 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
905 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
906 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
907 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
908 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
909 // CHECK: ret <4 x i16> [[VABD2_I]]
test_vabd_u16(uint16x4_t v1,uint16x4_t v2)910 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
911 return vabd_u16(v1, v2);
912 }
913
914 // CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
915 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
916 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
917 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
918 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
919 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
920 // CHECK: ret <2 x i32> [[VABD2_I]]
test_vabd_u32(uint32x2_t v1,uint32x2_t v2)921 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
922 return vabd_u32(v1, v2);
923 }
924
925 // CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %v1, <2 x float> %v2) #0 {
926 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
927 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
928 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
929 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
930 // CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) #4
931 // CHECK: ret <2 x float> [[VABD2_I]]
test_vabd_f32(float32x2_t v1,float32x2_t v2)932 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
933 return vabd_f32(v1, v2);
934 }
935
936 // CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
937 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
938 // CHECK: ret <16 x i8> [[VABD_I]]
test_vabdq_s8(int8x16_t v1,int8x16_t v2)939 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
940 return vabdq_s8(v1, v2);
941 }
942
943 // CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
944 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
945 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
946 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
947 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
948 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
949 // CHECK: ret <8 x i16> [[VABD2_I]]
test_vabdq_s16(int16x8_t v1,int16x8_t v2)950 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
951 return vabdq_s16(v1, v2);
952 }
953
954 // CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
955 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
956 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
957 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
958 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
959 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
960 // CHECK: ret <4 x i32> [[VABD2_I]]
test_vabdq_s32(int32x4_t v1,int32x4_t v2)961 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
962 return vabdq_s32(v1, v2);
963 }
964
965 // CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
966 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
967 // CHECK: ret <16 x i8> [[VABD_I]]
test_vabdq_u8(uint8x16_t v1,uint8x16_t v2)968 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
969 return vabdq_u8(v1, v2);
970 }
971
972 // CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
973 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
974 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
975 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
976 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
977 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
978 // CHECK: ret <8 x i16> [[VABD2_I]]
test_vabdq_u16(uint16x8_t v1,uint16x8_t v2)979 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
980 return vabdq_u16(v1, v2);
981 }
982
983 // CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
984 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
985 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
986 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
987 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
988 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
989 // CHECK: ret <4 x i32> [[VABD2_I]]
test_vabdq_u32(uint32x4_t v1,uint32x4_t v2)990 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
991 return vabdq_u32(v1, v2);
992 }
993
994 // CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
995 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
996 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
997 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
998 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
999 // CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]]) #4
1000 // CHECK: ret <4 x float> [[VABD2_I]]
test_vabdq_f32(float32x4_t v1,float32x4_t v2)1001 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
1002 return vabdq_f32(v1, v2);
1003 }
1004
1005 // CHECK-LABEL: define <2 x double> @test_vabdq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1006 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1007 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1008 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1009 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1010 // CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]]) #4
1011 // CHECK: ret <2 x double> [[VABD2_I]]
test_vabdq_f64(float64x2_t v1,float64x2_t v2)1012 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
1013 return vabdq_f64(v1, v2);
1014 }
1015
1016
1017 // CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
1018 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1019 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1020 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1021 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1022 // CHECK: ret <8 x i8> [[VBSL2_I]]
test_vbsl_s8(uint8x8_t v1,int8x8_t v2,int8x8_t v3)1023 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
1024 return vbsl_s8(v1, v2, v3);
1025 }
1026
1027 // CHECK-LABEL: define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
1028 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1029 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1030 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1031 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1032 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1033 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1034 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
1035 // CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
1036 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
1037 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1038 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
1039 // CHECK: ret <8 x i8> [[TMP4]]
test_vbsl_s16(uint16x4_t v1,int16x4_t v2,int16x4_t v3)1040 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
1041 return vbsl_s16(v1, v2, v3);
1042 }
1043
1044 // CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
1045 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1046 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1047 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1048 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1049 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1050 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1051 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
1052 // CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
1053 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
1054 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1055 // CHECK: ret <2 x i32> [[VBSL5_I]]
test_vbsl_s32(uint32x2_t v1,int32x2_t v2,int32x2_t v3)1056 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
1057 return vbsl_s32(v1, v2, v3);
1058 }
1059
1060 // CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 {
1061 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1062 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1063 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1064 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1065 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1066 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1067 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
1068 // CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
1069 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1070 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1071 // CHECK: ret <1 x i64> [[VBSL5_I]]
test_vbsl_s64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1072 uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1073 return vbsl_s64(v1, v2, v3);
1074 }
1075
1076 // CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
1077 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1078 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1079 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1080 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1081 // CHECK: ret <8 x i8> [[VBSL2_I]]
test_vbsl_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)1082 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
1083 return vbsl_u8(v1, v2, v3);
1084 }
1085
1086 // CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
1087 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1088 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1089 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1090 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1091 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1092 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1093 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
1094 // CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
1095 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
1096 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1097 // CHECK: ret <4 x i16> [[VBSL5_I]]
test_vbsl_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)1098 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
1099 return vbsl_u16(v1, v2, v3);
1100 }
1101
1102 // CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
1103 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1104 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1105 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1106 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1107 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1108 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1109 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
1110 // CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
1111 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
1112 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1113 // CHECK: ret <2 x i32> [[VBSL5_I]]
test_vbsl_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)1114 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
1115 return vbsl_u32(v1, v2, v3);
1116 }
1117
1118 // CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 {
1119 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1120 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1121 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1122 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1123 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1124 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1125 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
1126 // CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
1127 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1128 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1129 // CHECK: ret <1 x i64> [[VBSL5_I]]
test_vbsl_u64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1130 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1131 return vbsl_u64(v1, v2, v3);
1132 }
1133
1134 // CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
1135 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32>
1136 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
1137 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1138 // CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1139 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1140 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1141 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1142 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
1143 // CHECK: [[TMP4:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
1144 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1145 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1147 // CHECK: ret <2 x float> [[TMP5]]
test_vbsl_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)1148 float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
1149 return vbsl_f32(v1, v2, v3);
1150 }
1151
1152 // CHECK-LABEL: define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) #0 {
1153 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1154 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1155 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1156 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1157 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1158 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1159 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
1160 // CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
1161 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1162 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1163 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1164 // CHECK: ret <1 x double> [[TMP4]]
test_vbsl_f64(uint64x1_t v1,float64x1_t v2,float64x1_t v3)1165 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
1166 return vbsl_f64(v1, v2, v3);
1167 }
1168
1169 // CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
1170 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1171 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1172 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1173 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1174 // CHECK: ret <8 x i8> [[VBSL2_I]]
test_vbsl_p8(uint8x8_t v1,poly8x8_t v2,poly8x8_t v3)1175 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
1176 return vbsl_p8(v1, v2, v3);
1177 }
1178
1179 // CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
1180 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1181 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1182 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1183 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1184 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1185 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1186 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
1187 // CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
1188 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
1189 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1190 // CHECK: ret <4 x i16> [[VBSL5_I]]
test_vbsl_p16(uint16x4_t v1,poly16x4_t v2,poly16x4_t v3)1191 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
1192 return vbsl_p16(v1, v2, v3);
1193 }
1194
1195 // CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
1196 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1197 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1198 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1199 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1200 // CHECK: ret <16 x i8> [[VBSL2_I]]
test_vbslq_s8(uint8x16_t v1,int8x16_t v2,int8x16_t v3)1201 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
1202 return vbslq_s8(v1, v2, v3);
1203 }
1204
1205 // CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
1206 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1207 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1208 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1209 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1210 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1211 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1212 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
1213 // CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1214 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
1215 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1216 // CHECK: ret <8 x i16> [[VBSL5_I]]
test_vbslq_s16(uint16x8_t v1,int16x8_t v2,int16x8_t v3)1217 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
1218 return vbslq_s16(v1, v2, v3);
1219 }
1220
1221 // CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
1222 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1223 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1224 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1225 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1226 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1227 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1228 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
1229 // CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
1230 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1231 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1232 // CHECK: ret <4 x i32> [[VBSL5_I]]
test_vbslq_s32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1233 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1234 return vbslq_s32(v1, v2, v3);
1235 }
1236
1237 // CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 {
1238 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1239 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1240 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1241 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1242 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1243 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1244 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
1245 // CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
1246 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1247 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1248 // CHECK: ret <2 x i64> [[VBSL5_I]]
test_vbslq_s64(uint64x2_t v1,int64x2_t v2,int64x2_t v3)1249 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
1250 return vbslq_s64(v1, v2, v3);
1251 }
1252
1253 // CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
1254 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1255 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1256 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1257 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1258 // CHECK: ret <16 x i8> [[VBSL2_I]]
test_vbslq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)1259 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
1260 return vbslq_u8(v1, v2, v3);
1261 }
1262
1263 // CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
1264 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1265 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1266 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1267 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1268 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1269 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1270 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
1271 // CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1272 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
1273 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1274 // CHECK: ret <8 x i16> [[VBSL5_I]]
test_vbslq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)1275 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
1276 return vbslq_u16(v1, v2, v3);
1277 }
1278
1279 // CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
1280 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1281 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1282 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1283 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1284 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1285 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1286 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
1287 // CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
1288 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1289 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1290 // CHECK: ret <4 x i32> [[VBSL5_I]]
test_vbslq_u32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1291 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1292 return vbslq_s32(v1, v2, v3);
1293 }
1294
1295 // CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 {
1296 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1297 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1298 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1299 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1300 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1301 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1302 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
1303 // CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
1304 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1305 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1306 // CHECK: ret <2 x i64> [[VBSL5_I]]
test_vbslq_u64(uint64x2_t v1,uint64x2_t v2,uint64x2_t v3)1307 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
1308 return vbslq_u64(v1, v2, v3);
1309 }
1310
1311 // CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
1312 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1313 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1314 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1315 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1316 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1317 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1318 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
1319 // CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
1320 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1321 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1322 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1323 // CHECK: ret <4 x float> [[TMP4]]
test_vbslq_f32(uint32x4_t v1,float32x4_t v2,float32x4_t v3)1324 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
1325 return vbslq_f32(v1, v2, v3);
1326 }
1327
1328 // CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
1329 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1330 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1331 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1332 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1333 // CHECK: ret <16 x i8> [[VBSL2_I]]
test_vbslq_p8(uint8x16_t v1,poly8x16_t v2,poly8x16_t v3)1334 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
1335 return vbslq_p8(v1, v2, v3);
1336 }
1337
1338 // CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
1339 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1340 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1341 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1342 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1343 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1344 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1345 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
1346 // CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1347 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
1348 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1349 // CHECK: ret <8 x i16> [[VBSL5_I]]
test_vbslq_p16(uint16x8_t v1,poly16x8_t v2,poly16x8_t v3)1350 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
1351 return vbslq_p16(v1, v2, v3);
1352 }
1353
1354 // CHECK-LABEL: define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
1355 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1356 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1358 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1359 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1360 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1361 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
1362 // CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
1363 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1364 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1365 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1366 // CHECK: ret <2 x double> [[TMP4]]
test_vbslq_f64(uint64x2_t v1,float64x2_t v2,float64x2_t v3)1367 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
1368 return vbslq_f64(v1, v2, v3);
1369 }
1370
1371 // CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1372 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1373 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1374 // CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1375 // CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1376 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4
1377 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
1378 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float>
1379 // CHECK: ret <2 x float> [[TMP2]]
test_vrecps_f32(float32x2_t v1,float32x2_t v2)1380 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
1381 return vrecps_f32(v1, v2);
1382 }
1383
1384 // CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1385 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1386 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1387 // CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1388 // CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1389 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4
1390 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1391 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float>
1392 // CHECK: ret <4 x float> [[TMP2]]
test_vrecpsq_f32(float32x4_t v1,float32x4_t v2)1393 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
1394 return vrecpsq_f32(v1, v2);
1395 }
1396
1397 // CHECK-LABEL: define <2 x double> @test_vrecpsq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1398 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1399 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1400 // CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1401 // CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1402 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> [[VRECPSQ_V_I]], <2 x double> [[VRECPSQ_V1_I]]) #4
1403 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1404 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <2 x double>
1405 // CHECK: ret <2 x double> [[TMP2]]
test_vrecpsq_f64(float64x2_t v1,float64x2_t v2)1406 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
1407 return vrecpsq_f64(v1, v2);
1408 }
1409
1410 // CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1411 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1412 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1413 // CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1414 // CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1415 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4
1416 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1417 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float>
1418 // CHECK: ret <2 x float> [[TMP2]]
test_vrsqrts_f32(float32x2_t v1,float32x2_t v2)1419 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
1420 return vrsqrts_f32(v1, v2);
1421 }
1422
1423 // CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1424 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1425 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1426 // CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1427 // CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1428 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4
1429 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1430 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float>
1431 // CHECK: ret <4 x float> [[TMP2]]
test_vrsqrtsq_f32(float32x4_t v1,float32x4_t v2)1432 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
1433 return vrsqrtsq_f32(v1, v2);
1434 }
1435
1436 // CHECK-LABEL: define <2 x double> @test_vrsqrtsq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1437 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1438 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1439 // CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1440 // CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1441 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> [[VRSQRTSQ_V_I]], <2 x double> [[VRSQRTSQ_V1_I]]) #4
1442 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1443 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <2 x double>
1444 // CHECK: ret <2 x double> [[TMP2]]
test_vrsqrtsq_f64(float64x2_t v1,float64x2_t v2)1445 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
1446 return vrsqrtsq_f64(v1, v2);
1447 }
1448
1449 // CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1450 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1451 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1452 // CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1453 // CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1454 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4
1455 // CHECK: ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t v1,float32x2_t v2)1456 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
1457 return vcage_f32(v1, v2);
1458 }
1459
1460 // CHECK-LABEL: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 {
1461 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1462 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1463 // CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1464 // CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1465 // CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCAGE_V_I]], <1 x double> [[VCAGE_V1_I]]) #4
1466 // CHECK: ret <1 x i64> [[VCAGE_V2_I]]
test_vcage_f64(float64x1_t a,float64x1_t b)1467 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
1468 return vcage_f64(a, b);
1469 }
1470
1471 // CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1472 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1473 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1474 // CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1475 // CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1476 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4
1477 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t v1,float32x4_t v2)1478 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
1479 return vcageq_f32(v1, v2);
1480 }
1481
1482 // CHECK-LABEL: define <2 x i64> @test_vcageq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1483 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1484 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1485 // CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1486 // CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1487 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCAGEQ_V_I]], <2 x double> [[VCAGEQ_V1_I]]) #4
1488 // CHECK: ret <2 x i64> [[VCAGEQ_V2_I]]
test_vcageq_f64(float64x2_t v1,float64x2_t v2)1489 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
1490 return vcageq_f64(v1, v2);
1491 }
1492
1493 // CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1494 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1495 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1496 // CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1497 // CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1498 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4
1499 // CHECK: ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t v1,float32x2_t v2)1500 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
1501 return vcagt_f32(v1, v2);
1502 }
1503
1504 // CHECK-LABEL: define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 {
1505 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1506 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1507 // CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1508 // CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1509 // CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCAGT_V_I]], <1 x double> [[VCAGT_V1_I]]) #4
1510 // CHECK: ret <1 x i64> [[VCAGT_V2_I]]
test_vcagt_f64(float64x1_t a,float64x1_t b)1511 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
1512 return vcagt_f64(a, b);
1513 }
1514
1515 // CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1516 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1517 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1518 // CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1519 // CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1520 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4
1521 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t v1,float32x4_t v2)1522 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
1523 return vcagtq_f32(v1, v2);
1524 }
1525
1526 // CHECK-LABEL: define <2 x i64> @test_vcagtq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1527 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1528 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1529 // CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1530 // CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1531 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCAGTQ_V_I]], <2 x double> [[VCAGTQ_V1_I]]) #4
1532 // CHECK: ret <2 x i64> [[VCAGTQ_V2_I]]
test_vcagtq_f64(float64x2_t v1,float64x2_t v2)1533 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
1534 return vcagtq_f64(v1, v2);
1535 }
1536
1537 // CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1538 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1539 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1540 // CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1541 // CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1542 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4
1543 // CHECK: ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t v1,float32x2_t v2)1544 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
1545 return vcale_f32(v1, v2);
1546 // Using registers other than v0, v1 are possible, but would be odd.
1547 }
1548
1549 // CHECK-LABEL: define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 {
1550 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1551 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1552 // CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1553 // CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1554 // CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCALE_V_I]], <1 x double> [[VCALE_V1_I]]) #4
1555 // CHECK: ret <1 x i64> [[VCALE_V2_I]]
test_vcale_f64(float64x1_t a,float64x1_t b)1556 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
1557 return vcale_f64(a, b);
1558 }
1559
1560 // CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1561 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1562 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1563 // CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1564 // CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1565 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4
1566 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t v1,float32x4_t v2)1567 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
1568 return vcaleq_f32(v1, v2);
1569 // Using registers other than v0, v1 are possible, but would be odd.
1570 }
1571
1572 // CHECK-LABEL: define <2 x i64> @test_vcaleq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1573 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1574 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1575 // CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1576 // CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1577 // CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCALEQ_V_I]], <2 x double> [[VCALEQ_V1_I]]) #4
1578 // CHECK: ret <2 x i64> [[VCALEQ_V2_I]]
test_vcaleq_f64(float64x2_t v1,float64x2_t v2)1579 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
1580 return vcaleq_f64(v1, v2);
1581 // Using registers other than v0, v1 are possible, but would be odd.
1582 }
1583
1584 // CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1585 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1586 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1587 // CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1588 // CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1589 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4
1590 // CHECK: ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t v1,float32x2_t v2)1591 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
1592 return vcalt_f32(v1, v2);
1593 // Using registers other than v0, v1 are possible, but would be odd.
1594 }
1595
1596 // CHECK-LABEL: define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 {
1597 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1598 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1599 // CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1600 // CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1601 // CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCALT_V_I]], <1 x double> [[VCALT_V1_I]]) #4
1602 // CHECK: ret <1 x i64> [[VCALT_V2_I]]
test_vcalt_f64(float64x1_t a,float64x1_t b)1603 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
1604 return vcalt_f64(a, b);
1605 }
1606
1607 // CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1608 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1609 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1610 // CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1611 // CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1612 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4
1613 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t v1,float32x4_t v2)1614 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
1615 return vcaltq_f32(v1, v2);
1616 // Using registers other than v0, v1 are possible, but would be odd.
1617 }
1618
1619 // CHECK-LABEL: define <2 x i64> @test_vcaltq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1620 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1621 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1622 // CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1623 // CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1624 // CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCALTQ_V_I]], <2 x double> [[VCALTQ_V1_I]]) #4
1625 // CHECK: ret <2 x i64> [[VCALTQ_V2_I]]
test_vcaltq_f64(float64x2_t v1,float64x2_t v2)1626 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
1627 return vcaltq_f64(v1, v2);
1628 // Using registers other than v0, v1 are possible, but would be odd.
1629 }
1630
1631 // CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1632 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1633 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1634 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1635 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t v1,int8x8_t v2)1636 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
1637 return vtst_s8(v1, v2);
1638 }
1639
1640 // CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1641 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1642 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1643 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1644 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1645 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
1646 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
1647 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
1648 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t v1,int16x4_t v2)1649 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
1650 return vtst_s16(v1, v2);
1651 }
1652
1653 // CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1654 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1655 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1656 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1657 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1658 // CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
1659 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
1660 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
1661 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t v1,int32x2_t v2)1662 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
1663 return vtst_s32(v1, v2);
1664 }
1665
1666 // CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1667 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1668 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1669 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1670 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t v1,uint8x8_t v2)1671 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
1672 return vtst_u8(v1, v2);
1673 }
1674
1675 // CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1676 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1677 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1678 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1679 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1680 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
1681 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
1682 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
1683 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t v1,uint16x4_t v2)1684 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
1685 return vtst_u16(v1, v2);
1686 }
1687
1688 // CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1689 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1690 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1691 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1692 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1693 // CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
1694 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
1695 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
1696 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t v1,uint32x2_t v2)1697 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
1698 return vtst_u32(v1, v2);
1699 }
1700
1701 // CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1702 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1703 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1704 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1705 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t v1,int8x16_t v2)1706 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
1707 return vtstq_s8(v1, v2);
1708 }
1709
1710 // CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1711 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1712 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1713 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1714 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1715 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
1716 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
1717 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
1718 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t v1,int16x8_t v2)1719 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
1720 return vtstq_s16(v1, v2);
1721 }
1722
1723 // CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
1724 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1725 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1726 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1727 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1728 // CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
1729 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
1730 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
1731 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t v1,int32x4_t v2)1732 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
1733 return vtstq_s32(v1, v2);
1734 }
1735
1736 // CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1737 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1738 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1739 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1740 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t v1,uint8x16_t v2)1741 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
1742 return vtstq_u8(v1, v2);
1743 }
1744
1745 // CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1746 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1747 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1748 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1749 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1750 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
1751 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
1752 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
1753 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t v1,uint16x8_t v2)1754 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
1755 return vtstq_u16(v1, v2);
1756 }
1757
1758 // CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
1759 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1760 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1761 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1762 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1763 // CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
1764 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
1765 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
1766 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t v1,uint32x4_t v2)1767 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
1768 return vtstq_u32(v1, v2);
1769 }
1770
1771 // CHECK-LABEL: define <2 x i64> @test_vtstq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
1772 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1773 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1774 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1775 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1776 // CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
1777 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
1778 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
1779 // CHECK: ret <2 x i64> [[VTST_I]]
test_vtstq_s64(int64x2_t v1,int64x2_t v2)1780 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
1781 return vtstq_s64(v1, v2);
1782 }
1783
1784 // CHECK-LABEL: define <2 x i64> @test_vtstq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
1785 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1786 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1787 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1788 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1789 // CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
1790 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
1791 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
1792 // CHECK: ret <2 x i64> [[VTST_I]]
test_vtstq_u64(uint64x2_t v1,uint64x2_t v2)1793 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
1794 return vtstq_u64(v1, v2);
1795 }
1796
1797 // CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1798 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1799 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1800 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1801 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t v1,poly8x8_t v2)1802 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
1803 return vtst_p8(v1, v2);
1804 }
1805
1806 // CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1807 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1808 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1809 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1810 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1811 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
1812 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
1813 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
1814 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t v1,poly16x4_t v2)1815 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
1816 return vtst_p16(v1, v2);
1817 }
1818
1819 // CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1820 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1821 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1822 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1823 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t v1,poly8x16_t v2)1824 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
1825 return vtstq_p8(v1, v2);
1826 }
1827
1828 // CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1829 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1830 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1831 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1832 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1833 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
1834 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
1835 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
1836 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t v1,poly16x8_t v2)1837 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
1838 return vtstq_p16(v1, v2);
1839 }
1840
1841 // CHECK-LABEL: define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) #0 {
1842 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1843 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1844 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1845 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1846 // CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
1847 // CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
1848 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
1849 // CHECK: ret <1 x i64> [[VTST_I]]
test_vtst_s64(int64x1_t a,int64x1_t b)1850 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
1851 return vtst_s64(a, b);
1852 }
1853
1854 // CHECK-LABEL: define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) #0 {
1855 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1856 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1857 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1858 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1859 // CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
1860 // CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
1861 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
1862 // CHECK: ret <1 x i64> [[VTST_I]]
test_vtst_u64(uint64x1_t a,uint64x1_t b)1863 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
1864 return vtst_u64(a, b);
1865 }
1866
1867 // CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1868 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1869 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1870 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t v1,int8x8_t v2)1871 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
1872 return vceq_s8(v1, v2);
1873 }
1874
1875 // CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1876 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1877 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1878 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t v1,int16x4_t v2)1879 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
1880 return vceq_s16(v1, v2);
1881 }
1882
1883 // CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1884 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1885 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1886 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t v1,int32x2_t v2)1887 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
1888 return vceq_s32(v1, v2);
1889 }
1890
1891 // CHECK-LABEL: define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 {
1892 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1893 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1894 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_s64(int64x1_t a,int64x1_t b)1895 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
1896 return vceq_s64(a, b);
1897 }
1898
1899 // CHECK-LABEL: define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 {
1900 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1901 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1902 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_u64(uint64x1_t a,uint64x1_t b)1903 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
1904 return vceq_u64(a, b);
1905 }
1906
1907 // CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1908 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1909 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1910 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t v1,float32x2_t v2)1911 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
1912 return vceq_f32(v1, v2);
1913 }
1914
1915 // CHECK-LABEL: define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 {
1916 // CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1917 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1918 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_f64(float64x1_t a,float64x1_t b)1919 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
1920 return vceq_f64(a, b);
1921 }
1922
1923 // CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1924 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1925 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1926 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t v1,uint8x8_t v2)1927 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
1928 return vceq_u8(v1, v2);
1929 }
1930
1931 // CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1932 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1933 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1934 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t v1,uint16x4_t v2)1935 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
1936 return vceq_u16(v1, v2);
1937 }
1938
1939 // CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1940 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1941 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1942 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t v1,uint32x2_t v2)1943 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
1944 return vceq_u32(v1, v2);
1945 }
1946
1947 // CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1948 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1949 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1950 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t v1,poly8x8_t v2)1951 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
1952 return vceq_p8(v1, v2);
1953 }
1954
1955 // CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1956 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1957 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1958 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t v1,int8x16_t v2)1959 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
1960 return vceqq_s8(v1, v2);
1961 }
1962
1963 // CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1964 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1965 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1966 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t v1,int16x8_t v2)1967 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
1968 return vceqq_s16(v1, v2);
1969 }
1970
1971 // CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
1972 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1973 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1974 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t v1,int32x4_t v2)1975 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
1976 return vceqq_s32(v1, v2);
1977 }
1978
1979 // CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1980 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1981 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1982 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t v1,float32x4_t v2)1983 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
1984 return vceqq_f32(v1, v2);
1985 }
1986
1987 // CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1988 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1989 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1990 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t v1,uint8x16_t v2)1991 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
1992 return vceqq_u8(v1, v2);
1993 }
1994
1995 // CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1996 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1997 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1998 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t v1,uint16x8_t v2)1999 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
2000 return vceqq_u16(v1, v2);
2001 }
2002
2003 // CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2004 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
2005 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2006 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t v1,uint32x4_t v2)2007 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
2008 return vceqq_u32(v1, v2);
2009 }
2010
2011 // CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2012 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
2013 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2014 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t v1,poly8x16_t v2)2015 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
2016 return vceqq_p8(v1, v2);
2017 }
2018
2019
2020 // CHECK-LABEL: define <2 x i64> @test_vceqq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2021 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
2022 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2023 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_s64(int64x2_t v1,int64x2_t v2)2024 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
2025 return vceqq_s64(v1, v2);
2026 }
2027
2028 // CHECK-LABEL: define <2 x i64> @test_vceqq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2029 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
2030 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2031 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_u64(uint64x2_t v1,uint64x2_t v2)2032 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
2033 return vceqq_u64(v1, v2);
2034 }
2035
2036 // CHECK-LABEL: define <2 x i64> @test_vceqq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2037 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
2038 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2039 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_f64(float64x2_t v1,float64x2_t v2)2040 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
2041 return vceqq_f64(v1, v2);
2042 }
2043 // CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2044 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
2045 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2046 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t v1,int8x8_t v2)2047 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
2048 return vcge_s8(v1, v2);
2049 }
2050
2051 // CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2052 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
2053 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2054 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t v1,int16x4_t v2)2055 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
2056 return vcge_s16(v1, v2);
2057 }
2058
2059 // CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2060 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
2061 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2062 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t v1,int32x2_t v2)2063 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
2064 return vcge_s32(v1, v2);
2065 }
2066
2067 // CHECK-LABEL: define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2068 // CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
2069 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2070 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcge_s64(int64x1_t a,int64x1_t b)2071 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
2072 return vcge_s64(a, b);
2073 }
2074
2075 // CHECK-LABEL: define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2076 // CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
2077 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2078 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcge_u64(uint64x1_t a,uint64x1_t b)2079 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
2080 return vcge_u64(a, b);
2081 }
2082
2083 // CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2084 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
2085 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2086 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t v1,float32x2_t v2)2087 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
2088 return vcge_f32(v1, v2);
2089 }
2090
2091 // CHECK-LABEL: define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 {
2092 // CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
2093 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2094 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcge_f64(float64x1_t a,float64x1_t b)2095 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
2096 return vcge_f64(a, b);
2097 }
2098
2099 // CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2100 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
2101 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2102 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t v1,uint8x8_t v2)2103 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
2104 return vcge_u8(v1, v2);
2105 }
2106
2107 // CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2108 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
2109 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2110 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t v1,uint16x4_t v2)2111 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
2112 return vcge_u16(v1, v2);
2113 }
2114
2115 // CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2116 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
2117 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2118 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t v1,uint32x2_t v2)2119 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
2120 return vcge_u32(v1, v2);
2121 }
2122
2123 // CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2124 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
2125 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2126 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t v1,int8x16_t v2)2127 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
2128 return vcgeq_s8(v1, v2);
2129 }
2130
2131 // CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2132 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
2133 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2134 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t v1,int16x8_t v2)2135 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
2136 return vcgeq_s16(v1, v2);
2137 }
2138
2139 // CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2140 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
2141 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2142 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t v1,int32x4_t v2)2143 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
2144 return vcgeq_s32(v1, v2);
2145 }
2146
2147 // CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2148 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
2149 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2150 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t v1,float32x4_t v2)2151 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
2152 return vcgeq_f32(v1, v2);
2153 }
2154
2155 // CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2156 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
2157 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2158 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t v1,uint8x16_t v2)2159 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
2160 return vcgeq_u8(v1, v2);
2161 }
2162
2163 // CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2164 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
2165 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2166 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t v1,uint16x8_t v2)2167 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
2168 return vcgeq_u16(v1, v2);
2169 }
2170
2171 // CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2172 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
2173 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2174 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t v1,uint32x4_t v2)2175 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
2176 return vcgeq_u32(v1, v2);
2177 }
2178
2179 // CHECK-LABEL: define <2 x i64> @test_vcgeq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2180 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
2181 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2182 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgeq_s64(int64x2_t v1,int64x2_t v2)2183 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
2184 return vcgeq_s64(v1, v2);
2185 }
2186
2187 // CHECK-LABEL: define <2 x i64> @test_vcgeq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2188 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2189 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2190 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgeq_u64(uint64x2_t v1,uint64x2_t v2)2191 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
2192 return vcgeq_u64(v1, v2);
2193 }
2194
2195 // CHECK-LABEL: define <2 x i64> @test_vcgeq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2196 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2197 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2198 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgeq_f64(float64x2_t v1,float64x2_t v2)2199 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
2200 return vcgeq_f64(v1, v2);
2201 }
2202
2203 // Notes about vcle:
2204 // LE condition predicate implemented as GE, so check reversed operands.
2205 // Using registers other than v0, v1 are possible, but would be odd.
2206 // CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2207 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2208 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2209 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_s8(int8x8_t v1,int8x8_t v2)2210 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
2211 return vcle_s8(v1, v2);
2212 }
2213
2214 // CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2215 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2216 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2217 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t v1,int16x4_t v2)2218 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
2219 return vcle_s16(v1, v2);
2220 }
2221
2222 // CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2223 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2224 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2225 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t v1,int32x2_t v2)2226 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
2227 return vcle_s32(v1, v2);
2228 }
2229
2230 // CHECK-LABEL: define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2231 // CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2232 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2233 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcle_s64(int64x1_t a,int64x1_t b)2234 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
2235 return vcle_s64(a, b);
2236 }
2237
2238 // CHECK-LABEL: define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2239 // CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2240 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2241 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcle_u64(uint64x1_t a,uint64x1_t b)2242 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
2243 return vcle_u64(a, b);
2244 }
2245
2246 // CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2247 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2248 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2249 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t v1,float32x2_t v2)2250 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
2251 return vcle_f32(v1, v2);
2252 }
2253
2254 // CHECK-LABEL: define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 {
2255 // CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2256 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2257 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcle_f64(float64x1_t a,float64x1_t b)2258 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
2259 return vcle_f64(a, b);
2260 }
2261
2262 // CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2263 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2264 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2265 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t v1,uint8x8_t v2)2266 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
2267 return vcle_u8(v1, v2);
2268 }
2269
2270 // CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2271 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2272 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2273 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t v1,uint16x4_t v2)2274 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
2275 return vcle_u16(v1, v2);
2276 }
2277
2278 // CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2279 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2280 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2281 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t v1,uint32x2_t v2)2282 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
2283 return vcle_u32(v1, v2);
2284 }
2285
2286 // CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2287 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2288 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2289 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t v1,int8x16_t v2)2290 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
2291 return vcleq_s8(v1, v2);
2292 }
2293
2294 // CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2295 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2296 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2297 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t v1,int16x8_t v2)2298 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
2299 return vcleq_s16(v1, v2);
2300 }
2301
2302 // CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2303 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2304 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2305 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t v1,int32x4_t v2)2306 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
2307 return vcleq_s32(v1, v2);
2308 }
2309
2310 // CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2311 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2312 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2313 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t v1,float32x4_t v2)2314 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
2315 return vcleq_f32(v1, v2);
2316 }
2317
2318 // CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2319 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2320 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2321 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t v1,uint8x16_t v2)2322 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
2323 return vcleq_u8(v1, v2);
2324 }
2325
2326 // CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2327 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2328 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2329 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t v1,uint16x8_t v2)2330 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
2331 return vcleq_u16(v1, v2);
2332 }
2333
2334 // CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2335 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2336 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2337 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t v1,uint32x4_t v2)2338 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
2339 return vcleq_u32(v1, v2);
2340 }
2341
2342 // CHECK-LABEL: define <2 x i64> @test_vcleq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2343 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2344 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2345 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcleq_s64(int64x2_t v1,int64x2_t v2)2346 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
2347 return vcleq_s64(v1, v2);
2348 }
2349
2350 // CHECK-LABEL: define <2 x i64> @test_vcleq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2351 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2352 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2353 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcleq_u64(uint64x2_t v1,uint64x2_t v2)2354 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
2355 return vcleq_u64(v1, v2);
2356 }
2357
2358 // CHECK-LABEL: define <2 x i64> @test_vcleq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2359 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2360 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2361 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcleq_f64(float64x2_t v1,float64x2_t v2)2362 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
2363 return vcleq_f64(v1, v2);
2364 }
2365
2366
2367 // CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2368 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2369 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2370 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t v1,int8x8_t v2)2371 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
2372 return vcgt_s8(v1, v2);
2373 }
2374
2375 // CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2376 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2377 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2378 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t v1,int16x4_t v2)2379 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
2380 return vcgt_s16(v1, v2);
2381 }
2382
2383 // CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2384 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2385 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2386 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t v1,int32x2_t v2)2387 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
2388 return vcgt_s32(v1, v2);
2389 }
2390
2391 // CHECK-LABEL: define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2392 // CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2393 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2394 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcgt_s64(int64x1_t a,int64x1_t b)2395 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
2396 return vcgt_s64(a, b);
2397 }
2398
2399 // CHECK-LABEL: define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2400 // CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2401 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2402 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcgt_u64(uint64x1_t a,uint64x1_t b)2403 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
2404 return vcgt_u64(a, b);
2405 }
2406
2407 // CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2408 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2409 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2410 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t v1,float32x2_t v2)2411 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
2412 return vcgt_f32(v1, v2);
2413 }
2414
2415 // CHECK-LABEL: define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 {
2416 // CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2417 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2418 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcgt_f64(float64x1_t a,float64x1_t b)2419 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
2420 return vcgt_f64(a, b);
2421 }
2422
2423 // CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2424 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2425 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2426 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t v1,uint8x8_t v2)2427 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
2428 return vcgt_u8(v1, v2);
2429 }
2430
2431 // CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2432 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2433 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2434 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t v1,uint16x4_t v2)2435 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
2436 return vcgt_u16(v1, v2);
2437 }
2438
2439 // CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2440 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2441 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2442 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t v1,uint32x2_t v2)2443 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
2444 return vcgt_u32(v1, v2);
2445 }
2446
2447 // CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2448 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2449 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2450 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t v1,int8x16_t v2)2451 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
2452 return vcgtq_s8(v1, v2);
2453 }
2454
2455 // CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2456 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2457 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2458 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t v1,int16x8_t v2)2459 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
2460 return vcgtq_s16(v1, v2);
2461 }
2462
2463 // CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2464 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2465 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2466 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t v1,int32x4_t v2)2467 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
2468 return vcgtq_s32(v1, v2);
2469 }
2470
2471 // CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2472 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2473 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2474 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t v1,float32x4_t v2)2475 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
2476 return vcgtq_f32(v1, v2);
2477 }
2478
2479 // CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2480 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2481 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2482 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t v1,uint8x16_t v2)2483 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
2484 return vcgtq_u8(v1, v2);
2485 }
2486
2487 // CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2488 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2489 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2490 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t v1,uint16x8_t v2)2491 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
2492 return vcgtq_u16(v1, v2);
2493 }
2494
2495 // CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2496 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2497 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2498 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t v1,uint32x4_t v2)2499 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
2500 return vcgtq_u32(v1, v2);
2501 }
2502
2503 // CHECK-LABEL: define <2 x i64> @test_vcgtq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2504 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2505 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2506 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgtq_s64(int64x2_t v1,int64x2_t v2)2507 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
2508 return vcgtq_s64(v1, v2);
2509 }
2510
2511 // CHECK-LABEL: define <2 x i64> @test_vcgtq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2512 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2513 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2514 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgtq_u64(uint64x2_t v1,uint64x2_t v2)2515 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
2516 return vcgtq_u64(v1, v2);
2517 }
2518
2519 // CHECK-LABEL: define <2 x i64> @test_vcgtq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2520 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2521 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2522 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgtq_f64(float64x2_t v1,float64x2_t v2)2523 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
2524 return vcgtq_f64(v1, v2);
2525 }
2526
2527
2528 // Notes about vclt:
2529 // LT condition predicate implemented as GT, so check reversed operands.
2530 // Using registers other than v0, v1 are possible, but would be odd.
2531
2532 // CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2533 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2534 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2535 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_s8(int8x8_t v1,int8x8_t v2)2536 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
2537 return vclt_s8(v1, v2);
2538 }
2539
2540 // CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2541 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2542 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2543 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t v1,int16x4_t v2)2544 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
2545 return vclt_s16(v1, v2);
2546 }
2547
2548 // CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2549 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2550 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2551 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t v1,int32x2_t v2)2552 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
2553 return vclt_s32(v1, v2);
2554 }
2555
2556 // CHECK-LABEL: define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2557 // CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2558 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2559 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vclt_s64(int64x1_t a,int64x1_t b)2560 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
2561 return vclt_s64(a, b);
2562 }
2563
2564 // CHECK-LABEL: define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2565 // CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2566 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2567 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vclt_u64(uint64x1_t a,uint64x1_t b)2568 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
2569 return vclt_u64(a, b);
2570 }
2571
2572 // CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2573 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2574 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2575 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t v1,float32x2_t v2)2576 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
2577 return vclt_f32(v1, v2);
2578 }
2579
2580 // CHECK-LABEL: define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 {
2581 // CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2582 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2583 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vclt_f64(float64x1_t a,float64x1_t b)2584 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
2585 return vclt_f64(a, b);
2586 }
2587
2588 // CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2589 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2590 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2591 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t v1,uint8x8_t v2)2592 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
2593 return vclt_u8(v1, v2);
2594 }
2595
2596 // CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2597 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2598 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2599 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t v1,uint16x4_t v2)2600 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
2601 return vclt_u16(v1, v2);
2602 }
2603
2604 // CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2605 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2606 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2607 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t v1,uint32x2_t v2)2608 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
2609 return vclt_u32(v1, v2);
2610 }
2611
2612 // CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2613 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2614 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2615 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t v1,int8x16_t v2)2616 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
2617 return vcltq_s8(v1, v2);
2618 }
2619
2620 // CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2621 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2622 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2623 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t v1,int16x8_t v2)2624 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
2625 return vcltq_s16(v1, v2);
2626 }
2627
2628 // CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2629 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2630 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2631 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t v1,int32x4_t v2)2632 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
2633 return vcltq_s32(v1, v2);
2634 }
2635
2636 // CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2637 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2638 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2639 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t v1,float32x4_t v2)2640 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
2641 return vcltq_f32(v1, v2);
2642 }
2643
2644 // CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2645 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2646 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2647 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t v1,uint8x16_t v2)2648 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
2649 return vcltq_u8(v1, v2);
2650 }
2651
2652 // CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2653 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2654 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2655 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t v1,uint16x8_t v2)2656 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
2657 return vcltq_u16(v1, v2);
2658 }
2659
2660 // CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2661 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2662 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2663 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t v1,uint32x4_t v2)2664 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
2665 return vcltq_u32(v1, v2);
2666 }
2667
2668 // CHECK-LABEL: define <2 x i64> @test_vcltq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2669 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2670 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2671 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcltq_s64(int64x2_t v1,int64x2_t v2)2672 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
2673 return vcltq_s64(v1, v2);
2674 }
2675
2676 // CHECK-LABEL: define <2 x i64> @test_vcltq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2677 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2678 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2679 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcltq_u64(uint64x2_t v1,uint64x2_t v2)2680 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
2681 return vcltq_u64(v1, v2);
2682 }
2683
2684 // CHECK-LABEL: define <2 x i64> @test_vcltq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2685 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2686 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2687 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcltq_f64(float64x2_t v1,float64x2_t v2)2688 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
2689 return vcltq_f64(v1, v2);
2690 }
2691
2692
2693 // CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2694 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2695 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t v1,int8x8_t v2)2696 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
2697 return vhadd_s8(v1, v2);
2698 }
2699
2700 // CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2701 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2702 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2703 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2704 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2705 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
2706 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2707 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
2708 // CHECK: ret <4 x i16> [[TMP2]]
test_vhadd_s16(int16x4_t v1,int16x4_t v2)2709 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
2710 return vhadd_s16(v1, v2);
2711 }
2712
2713 // CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2714 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2715 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2716 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2717 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2718 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
2719 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2720 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
2721 // CHECK: ret <2 x i32> [[TMP2]]
test_vhadd_s32(int32x2_t v1,int32x2_t v2)2722 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
2723 return vhadd_s32(v1, v2);
2724 }
2725
2726 // CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2727 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2728 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t v1,uint8x8_t v2)2729 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2730 return vhadd_u8(v1, v2);
2731 }
2732
2733 // CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2734 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2735 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2736 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2737 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2738 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
2739 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2740 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
2741 // CHECK: ret <4 x i16> [[TMP2]]
test_vhadd_u16(uint16x4_t v1,uint16x4_t v2)2742 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2743 return vhadd_u16(v1, v2);
2744 }
2745
2746 // CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2747 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2748 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2749 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2750 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2751 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
2752 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2753 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
2754 // CHECK: ret <2 x i32> [[TMP2]]
test_vhadd_u32(uint32x2_t v1,uint32x2_t v2)2755 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2756 return vhadd_u32(v1, v2);
2757 }
2758
2759 // CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2760 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2761 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t v1,int8x16_t v2)2762 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
2763 return vhaddq_s8(v1, v2);
2764 }
2765
2766 // CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2767 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2768 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2769 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2770 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2771 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
2772 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2773 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
2774 // CHECK: ret <8 x i16> [[TMP2]]
test_vhaddq_s16(int16x8_t v1,int16x8_t v2)2775 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
2776 return vhaddq_s16(v1, v2);
2777 }
2778
2779 // CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2780 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2781 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2782 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2783 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2784 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
2785 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2786 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
2787 // CHECK: ret <4 x i32> [[TMP2]]
test_vhaddq_s32(int32x4_t v1,int32x4_t v2)2788 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
2789 return vhaddq_s32(v1, v2);
2790 }
2791
2792 // CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2793 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2794 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t v1,uint8x16_t v2)2795 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2796 return vhaddq_u8(v1, v2);
2797 }
2798
2799 // CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2800 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2801 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2802 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2803 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2804 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
2805 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2806 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
2807 // CHECK: ret <8 x i16> [[TMP2]]
test_vhaddq_u16(uint16x8_t v1,uint16x8_t v2)2808 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2809 return vhaddq_u16(v1, v2);
2810 }
2811
2812 // CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2813 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2814 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2815 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2816 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2817 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
2818 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2819 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
2820 // CHECK: ret <4 x i32> [[TMP2]]
test_vhaddq_u32(uint32x4_t v1,uint32x4_t v2)2821 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2822 return vhaddq_u32(v1, v2);
2823 }
2824
2825
2826 // CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2827 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2828 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t v1,int8x8_t v2)2829 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
2830 return vhsub_s8(v1, v2);
2831 }
2832
2833 // CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2834 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2835 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2836 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2837 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2838 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
2839 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2840 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
2841 // CHECK: ret <4 x i16> [[TMP2]]
test_vhsub_s16(int16x4_t v1,int16x4_t v2)2842 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
2843 return vhsub_s16(v1, v2);
2844 }
2845
2846 // CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2847 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2848 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2849 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2850 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2851 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
2852 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2853 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
2854 // CHECK: ret <2 x i32> [[TMP2]]
test_vhsub_s32(int32x2_t v1,int32x2_t v2)2855 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
2856 return vhsub_s32(v1, v2);
2857 }
2858
2859 // CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2860 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2861 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t v1,uint8x8_t v2)2862 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
2863 return vhsub_u8(v1, v2);
2864 }
2865
2866 // CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2867 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2868 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2869 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2870 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2871 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
2872 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2873 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
2874 // CHECK: ret <4 x i16> [[TMP2]]
test_vhsub_u16(uint16x4_t v1,uint16x4_t v2)2875 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
2876 return vhsub_u16(v1, v2);
2877 }
2878
2879 // CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2880 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2881 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2882 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2883 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2884 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
2885 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2886 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
2887 // CHECK: ret <2 x i32> [[TMP2]]
test_vhsub_u32(uint32x2_t v1,uint32x2_t v2)2888 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
2889 return vhsub_u32(v1, v2);
2890 }
2891
2892 // CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2893 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2894 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t v1,int8x16_t v2)2895 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
2896 return vhsubq_s8(v1, v2);
2897 }
2898
2899 // CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2900 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2901 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2902 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2903 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2904 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
2905 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2906 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
2907 // CHECK: ret <8 x i16> [[TMP2]]
test_vhsubq_s16(int16x8_t v1,int16x8_t v2)2908 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
2909 return vhsubq_s16(v1, v2);
2910 }
2911
2912 // CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2913 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2914 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2915 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2916 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2917 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
2918 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2919 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
2920 // CHECK: ret <4 x i32> [[TMP2]]
test_vhsubq_s32(int32x4_t v1,int32x4_t v2)2921 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
2922 return vhsubq_s32(v1, v2);
2923 }
2924
2925 // CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2926 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2927 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t v1,uint8x16_t v2)2928 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
2929 return vhsubq_u8(v1, v2);
2930 }
2931
2932 // CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2933 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2934 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2935 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2936 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2937 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
2938 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2939 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
2940 // CHECK: ret <8 x i16> [[TMP2]]
test_vhsubq_u16(uint16x8_t v1,uint16x8_t v2)2941 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
2942 return vhsubq_u16(v1, v2);
2943 }
2944
2945 // CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2946 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2947 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2948 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2949 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2950 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
2951 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2952 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
2953 // CHECK: ret <4 x i32> [[TMP2]]
test_vhsubq_u32(uint32x4_t v1,uint32x4_t v2)2954 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
2955 return vhsubq_u32(v1, v2);
2956 }
2957
2958
2959 // CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2960 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2961 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t v1,int8x8_t v2)2962 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
2963 return vrhadd_s8(v1, v2);
2964 }
2965
2966 // CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2967 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2968 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2969 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2970 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2971 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
2972 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2973 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
2974 // CHECK: ret <4 x i16> [[TMP2]]
test_vrhadd_s16(int16x4_t v1,int16x4_t v2)2975 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
2976 return vrhadd_s16(v1, v2);
2977 }
2978
2979 // CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2980 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2981 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2982 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2983 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2984 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
2985 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2986 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
2987 // CHECK: ret <2 x i32> [[TMP2]]
test_vrhadd_s32(int32x2_t v1,int32x2_t v2)2988 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
2989 return vrhadd_s32(v1, v2);
2990 }
2991
2992 // CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2993 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2994 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t v1,uint8x8_t v2)2995 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2996 return vrhadd_u8(v1, v2);
2997 }
2998
2999 // CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
3000 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
3001 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
3002 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3003 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3004 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
3005 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
3006 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
3007 // CHECK: ret <4 x i16> [[TMP2]]
test_vrhadd_u16(uint16x4_t v1,uint16x4_t v2)3008 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
3009 return vrhadd_u16(v1, v2);
3010 }
3011
3012 // CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
3013 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
3014 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
3015 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3016 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3017 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
3018 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
3019 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
3020 // CHECK: ret <2 x i32> [[TMP2]]
test_vrhadd_u32(uint32x2_t v1,uint32x2_t v2)3021 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
3022 return vrhadd_u32(v1, v2);
3023 }
3024
3025 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
3026 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
3027 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t v1,int8x16_t v2)3028 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
3029 return vrhaddq_s8(v1, v2);
3030 }
3031
3032 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
3033 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
3034 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
3035 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3036 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3037 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
3038 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
3039 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
3040 // CHECK: ret <8 x i16> [[TMP2]]
test_vrhaddq_s16(int16x8_t v1,int16x8_t v2)3041 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
3042 return vrhaddq_s16(v1, v2);
3043 }
3044
3045 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
3046 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
3047 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
3048 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3049 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3050 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
3051 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
3052 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
3053 // CHECK: ret <4 x i32> [[TMP2]]
test_vrhaddq_s32(int32x4_t v1,int32x4_t v2)3054 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
3055 return vrhaddq_s32(v1, v2);
3056 }
3057
3058 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
3059 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
3060 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t v1,uint8x16_t v2)3061 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
3062 return vrhaddq_u8(v1, v2);
3063 }
3064
3065 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
3066 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
3067 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
3068 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3069 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3070 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
3071 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
3072 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
3073 // CHECK: ret <8 x i16> [[TMP2]]
test_vrhaddq_u16(uint16x8_t v1,uint16x8_t v2)3074 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
3075 return vrhaddq_u16(v1, v2);
3076 }
3077
3078 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
3079 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
3080 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
3081 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3082 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3083 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
3084 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
3085 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
3086 // CHECK: ret <4 x i32> [[TMP2]]
test_vrhaddq_u32(uint32x4_t v1,uint32x4_t v2)3087 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
3088 return vrhaddq_u32(v1, v2);
3089 }
3090 // CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3091 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3092 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)3093 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
3094 return vqadd_s8(a, b);
3095 }
3096
3097 // CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3098 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3099 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3100 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3101 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3102 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
3103 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
3104 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
3105 // CHECK: ret <4 x i16> [[TMP2]]
test_vqadd_s16(int16x4_t a,int16x4_t b)3106 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
3107 return vqadd_s16(a, b);
3108 }
3109
3110 // CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3111 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3112 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3113 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3114 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3115 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
3116 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
3117 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
3118 // CHECK: ret <2 x i32> [[TMP2]]
test_vqadd_s32(int32x2_t a,int32x2_t b)3119 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
3120 return vqadd_s32(a, b);
3121 }
3122
3123 // CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3124 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3125 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3126 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3127 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3128 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
3129 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
3130 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
3131 // CHECK: ret <1 x i64> [[TMP2]]
test_vqadd_s64(int64x1_t a,int64x1_t b)3132 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
3133 return vqadd_s64(a, b);
3134 }
3135
3136 // CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3137 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3138 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)3139 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
3140 return vqadd_u8(a, b);
3141 }
3142
3143 // CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3144 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3145 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3146 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3147 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3148 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
3149 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
3150 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
3151 // CHECK: ret <4 x i16> [[TMP2]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)3152 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
3153 return vqadd_u16(a, b);
3154 }
3155
3156 // CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3157 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3158 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3159 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3160 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3161 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
3162 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
3163 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
3164 // CHECK: ret <2 x i32> [[TMP2]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)3165 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
3166 return vqadd_u32(a, b);
3167 }
3168
3169 // CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3170 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3171 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3172 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3173 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3174 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
3175 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
3176 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
3177 // CHECK: ret <1 x i64> [[TMP2]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)3178 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
3179 return vqadd_u64(a, b);
3180 }
3181
3182 // CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3183 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3184 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)3185 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
3186 return vqaddq_s8(a, b);
3187 }
3188
3189 // CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3190 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3191 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3192 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3193 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3194 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
3195 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
3196 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
3197 // CHECK: ret <8 x i16> [[TMP2]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)3198 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
3199 return vqaddq_s16(a, b);
3200 }
3201
3202 // CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3203 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3204 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3205 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3206 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3207 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
3208 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
3209 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
3210 // CHECK: ret <4 x i32> [[TMP2]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)3211 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
3212 return vqaddq_s32(a, b);
3213 }
3214
3215 // CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3216 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3217 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3218 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3219 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3220 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
3221 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
3222 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
3223 // CHECK: ret <2 x i64> [[TMP2]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)3224 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
3225 return vqaddq_s64(a, b);
3226 }
3227
3228 // CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3229 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3230 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)3231 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
3232 return vqaddq_u8(a, b);
3233 }
3234
3235 // CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3236 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3237 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3238 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3239 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3240 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
3241 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
3242 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
3243 // CHECK: ret <8 x i16> [[TMP2]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)3244 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
3245 return vqaddq_u16(a, b);
3246 }
3247
3248 // CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3249 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3250 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3251 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3252 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3253 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
3254 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
3255 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
3256 // CHECK: ret <4 x i32> [[TMP2]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)3257 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
3258 return vqaddq_u32(a, b);
3259 }
3260
3261 // CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3262 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3263 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3264 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3265 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3266 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
3267 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
3268 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
3269 // CHECK: ret <2 x i64> [[TMP2]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)3270 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
3271 return vqaddq_u64(a, b);
3272 }
3273
3274
3275 // CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3276 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3277 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)3278 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
3279 return vqsub_s8(a, b);
3280 }
3281
3282 // CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3286 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3287 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
3288 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3289 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
3290 // CHECK: ret <4 x i16> [[TMP2]]
test_vqsub_s16(int16x4_t a,int16x4_t b)3291 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
3292 return vqsub_s16(a, b);
3293 }
3294
3295 // CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3296 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3297 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3298 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3299 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3300 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
3301 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3302 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
3303 // CHECK: ret <2 x i32> [[TMP2]]
test_vqsub_s32(int32x2_t a,int32x2_t b)3304 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
3305 return vqsub_s32(a, b);
3306 }
3307
3308 // CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3309 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3310 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3311 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3312 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3313 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
3314 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3315 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
3316 // CHECK: ret <1 x i64> [[TMP2]]
test_vqsub_s64(int64x1_t a,int64x1_t b)3317 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
3318 return vqsub_s64(a, b);
3319 }
3320
3321 // CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3322 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3323 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)3324 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
3325 return vqsub_u8(a, b);
3326 }
3327
3328 // CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3329 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3330 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3331 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3332 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3333 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
3334 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3335 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
3336 // CHECK: ret <4 x i16> [[TMP2]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)3337 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
3338 return vqsub_u16(a, b);
3339 }
3340
3341 // CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3342 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3343 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3344 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3345 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3346 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
3347 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3348 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
3349 // CHECK: ret <2 x i32> [[TMP2]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)3350 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
3351 return vqsub_u32(a, b);
3352 }
3353
3354 // CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3355 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3356 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3357 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3358 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3359 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
3360 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3361 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
3362 // CHECK: ret <1 x i64> [[TMP2]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)3363 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
3364 return vqsub_u64(a, b);
3365 }
3366
3367 // CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3368 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3369 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)3370 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
3371 return vqsubq_s8(a, b);
3372 }
3373
3374 // CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3375 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3376 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3377 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3378 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3379 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
3380 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3381 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
3382 // CHECK: ret <8 x i16> [[TMP2]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)3383 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
3384 return vqsubq_s16(a, b);
3385 }
3386
3387 // CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3388 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3389 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3390 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3391 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3392 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
3393 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3394 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
3395 // CHECK: ret <4 x i32> [[TMP2]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)3396 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
3397 return vqsubq_s32(a, b);
3398 }
3399
3400 // CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3401 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3402 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3403 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3404 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3405 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
3406 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3407 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
3408 // CHECK: ret <2 x i64> [[TMP2]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)3409 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
3410 return vqsubq_s64(a, b);
3411 }
3412
3413 // CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3414 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3415 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)3416 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
3417 return vqsubq_u8(a, b);
3418 }
3419
3420 // CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3421 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3422 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3423 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3424 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3425 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
3426 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3427 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
3428 // CHECK: ret <8 x i16> [[TMP2]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)3429 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
3430 return vqsubq_u16(a, b);
3431 }
3432
3433 // CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3434 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3435 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3436 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3437 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3438 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
3439 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3440 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
3441 // CHECK: ret <4 x i32> [[TMP2]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)3442 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
3443 return vqsubq_u32(a, b);
3444 }
3445
3446 // CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3447 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3448 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3449 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3450 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3451 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
3452 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3453 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
3454 // CHECK: ret <2 x i64> [[TMP2]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)3455 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
3456 return vqsubq_u64(a, b);
3457 }
3458
3459
3460 // CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3461 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3462 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)3463 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
3464 return vshl_s8(a, b);
3465 }
3466
3467 // CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3471 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3472 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
3473 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3474 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
3475 // CHECK: ret <4 x i16> [[TMP2]]
test_vshl_s16(int16x4_t a,int16x4_t b)3476 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
3477 return vshl_s16(a, b);
3478 }
3479
3480 // CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3481 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3482 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3483 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3484 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3485 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
3486 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3487 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
3488 // CHECK: ret <2 x i32> [[TMP2]]
test_vshl_s32(int32x2_t a,int32x2_t b)3489 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
3490 return vshl_s32(a, b);
3491 }
3492
3493 // CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3494 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3495 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3496 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3497 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3498 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
3499 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3500 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
3501 // CHECK: ret <1 x i64> [[TMP2]]
test_vshl_s64(int64x1_t a,int64x1_t b)3502 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
3503 return vshl_s64(a, b);
3504 }
3505
3506 // CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3507 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3508 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)3509 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
3510 return vshl_u8(a, b);
3511 }
3512
3513 // CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3514 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3515 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3516 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3517 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3518 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
3519 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3520 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
3521 // CHECK: ret <4 x i16> [[TMP2]]
test_vshl_u16(uint16x4_t a,int16x4_t b)3522 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
3523 return vshl_u16(a, b);
3524 }
3525
3526 // CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3527 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3528 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3529 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3530 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3531 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
3532 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3533 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
3534 // CHECK: ret <2 x i32> [[TMP2]]
test_vshl_u32(uint32x2_t a,int32x2_t b)3535 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
3536 return vshl_u32(a, b);
3537 }
3538
3539 // CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3540 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3541 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3542 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3543 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3544 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
3545 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3546 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
3547 // CHECK: ret <1 x i64> [[TMP2]]
test_vshl_u64(uint64x1_t a,int64x1_t b)3548 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
3549 return vshl_u64(a, b);
3550 }
3551
3552 // CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3553 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3554 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)3555 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3556 return vshlq_s8(a, b);
3557 }
3558
3559 // CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3560 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3561 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3562 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3563 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3564 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
3565 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3566 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
3567 // CHECK: ret <8 x i16> [[TMP2]]
test_vshlq_s16(int16x8_t a,int16x8_t b)3568 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3569 return vshlq_s16(a, b);
3570 }
3571
3572 // CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3573 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3574 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3575 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3576 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3577 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
3578 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3579 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
3580 // CHECK: ret <4 x i32> [[TMP2]]
test_vshlq_s32(int32x4_t a,int32x4_t b)3581 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3582 return vshlq_s32(a, b);
3583 }
3584
3585 // CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3586 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3587 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3588 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3589 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3590 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
3591 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3592 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
3593 // CHECK: ret <2 x i64> [[TMP2]]
test_vshlq_s64(int64x2_t a,int64x2_t b)3594 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3595 return vshlq_s64(a, b);
3596 }
3597
3598 // CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3599 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3600 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)3601 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3602 return vshlq_u8(a, b);
3603 }
3604
3605 // CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3606 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3607 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3608 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3609 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3610 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
3611 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3612 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
3613 // CHECK: ret <8 x i16> [[TMP2]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)3614 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3615 return vshlq_u16(a, b);
3616 }
3617
3618 // CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3619 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3620 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3621 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3622 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3623 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
3624 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3625 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
3626 // CHECK: ret <4 x i32> [[TMP2]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)3627 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3628 return vshlq_u32(a, b);
3629 }
3630
3631 // CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3632 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3633 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3634 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3635 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3636 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
3637 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3638 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
3639 // CHECK: ret <2 x i64> [[TMP2]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)3640 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3641 return vshlq_u64(a, b);
3642 }
3643
3644
3645 // CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3646 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3647 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)3648 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
3649 return vqshl_s8(a, b);
3650 }
3651
3652 // CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3653 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3654 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3655 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3656 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3657 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
3658 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3659 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
3660 // CHECK: ret <4 x i16> [[TMP2]]
test_vqshl_s16(int16x4_t a,int16x4_t b)3661 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
3662 return vqshl_s16(a, b);
3663 }
3664
3665 // CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3666 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3667 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3668 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3669 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3670 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
3671 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3672 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
3673 // CHECK: ret <2 x i32> [[TMP2]]
test_vqshl_s32(int32x2_t a,int32x2_t b)3674 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
3675 return vqshl_s32(a, b);
3676 }
3677
3678 // CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3679 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3680 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3681 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3682 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3683 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
3684 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3685 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
3686 // CHECK: ret <1 x i64> [[TMP2]]
test_vqshl_s64(int64x1_t a,int64x1_t b)3687 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
3688 return vqshl_s64(a, b);
3689 }
3690
3691 // CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3692 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3693 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)3694 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
3695 return vqshl_u8(a, b);
3696 }
3697
3698 // CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3699 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3700 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3701 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3702 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3703 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
3704 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3705 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
3706 // CHECK: ret <4 x i16> [[TMP2]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)3707 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
3708 return vqshl_u16(a, b);
3709 }
3710
3711 // CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3712 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3713 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3714 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3715 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3716 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
3717 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3718 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
3719 // CHECK: ret <2 x i32> [[TMP2]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)3720 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
3721 return vqshl_u32(a, b);
3722 }
3723
3724 // CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3725 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3726 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3727 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3728 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3729 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
3730 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3731 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
3732 // CHECK: ret <1 x i64> [[TMP2]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)3733 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
3734 return vqshl_u64(a, b);
3735 }
3736
3737 // CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3738 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3739 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)3740 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
3741 return vqshlq_s8(a, b);
3742 }
3743
3744 // CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3745 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3746 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3747 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3748 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3749 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
3750 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3751 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
3752 // CHECK: ret <8 x i16> [[TMP2]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)3753 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
3754 return vqshlq_s16(a, b);
3755 }
3756
3757 // CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3758 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3759 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3760 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3761 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3762 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
3763 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3764 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
3765 // CHECK: ret <4 x i32> [[TMP2]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)3766 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
3767 return vqshlq_s32(a, b);
3768 }
3769
3770 // CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3772 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3773 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3774 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3775 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
3776 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3777 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
3778 // CHECK: ret <2 x i64> [[TMP2]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)3779 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
3780 return vqshlq_s64(a, b);
3781 }
3782
3783 // CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3784 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3785 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)3786 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
3787 return vqshlq_u8(a, b);
3788 }
3789
3790 // CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3791 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3792 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3793 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3794 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3795 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
3796 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3797 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
3798 // CHECK: ret <8 x i16> [[TMP2]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)3799 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
3800 return vqshlq_u16(a, b);
3801 }
3802
3803 // CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3804 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3805 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3806 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3807 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3808 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
3809 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3810 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
3811 // CHECK: ret <4 x i32> [[TMP2]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)3812 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
3813 return vqshlq_u32(a, b);
3814 }
3815
3816 // CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3817 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3818 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3819 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3820 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3821 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
3822 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3823 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
3824 // CHECK: ret <2 x i64> [[TMP2]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)3825 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
3826 return vqshlq_u64(a, b);
3827 }
3828
3829 // CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3830 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3831 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)3832 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3833 return vrshl_s8(a, b);
3834 }
3835
3836 // CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3837 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3838 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3839 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3840 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3841 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
3842 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3843 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
3844 // CHECK: ret <4 x i16> [[TMP2]]
test_vrshl_s16(int16x4_t a,int16x4_t b)3845 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3846 return vrshl_s16(a, b);
3847 }
3848
3849 // CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3850 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3851 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3852 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3853 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3854 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
3855 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3856 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
3857 // CHECK: ret <2 x i32> [[TMP2]]
test_vrshl_s32(int32x2_t a,int32x2_t b)3858 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3859 return vrshl_s32(a, b);
3860 }
3861
3862 // CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3863 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3864 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3865 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3866 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3867 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
3868 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3869 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
3870 // CHECK: ret <1 x i64> [[TMP2]]
test_vrshl_s64(int64x1_t a,int64x1_t b)3871 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3872 return vrshl_s64(a, b);
3873 }
3874
3875 // CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3876 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3877 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)3878 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3879 return vrshl_u8(a, b);
3880 }
3881
3882 // CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3883 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3884 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3885 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3886 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3887 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
3888 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3889 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
3890 // CHECK: ret <4 x i16> [[TMP2]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)3891 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3892 return vrshl_u16(a, b);
3893 }
3894
3895 // CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3896 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3897 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3898 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3899 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3900 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
3901 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3902 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
3903 // CHECK: ret <2 x i32> [[TMP2]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)3904 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3905 return vrshl_u32(a, b);
3906 }
3907
3908 // CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3909 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3910 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3911 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3912 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3913 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
3914 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3915 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
3916 // CHECK: ret <1 x i64> [[TMP2]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)3917 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3918 return vrshl_u64(a, b);
3919 }
3920
3921 // CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3922 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3923 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)3924 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3925 return vrshlq_s8(a, b);
3926 }
3927
3928 // CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3929 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3930 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3931 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3932 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3933 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
3934 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3935 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
3936 // CHECK: ret <8 x i16> [[TMP2]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)3937 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3938 return vrshlq_s16(a, b);
3939 }
3940
3941 // CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3942 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3943 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3944 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3945 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3946 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
3947 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3948 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
3949 // CHECK: ret <4 x i32> [[TMP2]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)3950 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3951 return vrshlq_s32(a, b);
3952 }
3953
3954 // CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3955 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3956 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3957 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3958 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3959 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
3960 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3961 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
3962 // CHECK: ret <2 x i64> [[TMP2]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)3963 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3964 return vrshlq_s64(a, b);
3965 }
3966
3967 // CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3968 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3969 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)3970 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3971 return vrshlq_u8(a, b);
3972 }
3973
3974 // CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3975 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3976 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3977 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3978 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3979 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
3980 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3981 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
3982 // CHECK: ret <8 x i16> [[TMP2]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)3983 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3984 return vrshlq_u16(a, b);
3985 }
3986
3987 // CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3988 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3989 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3990 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3991 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3992 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
3993 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3994 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
3995 // CHECK: ret <4 x i32> [[TMP2]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)3996 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3997 return vrshlq_u32(a, b);
3998 }
3999
4000 // CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
4001 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4002 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4003 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4004 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4005 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
4006 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
4007 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
4008 // CHECK: ret <2 x i64> [[TMP2]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)4009 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
4010 return vrshlq_u64(a, b);
4011 }
4012
4013
4014 // CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4015 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4016 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)4017 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
4018 return vqrshl_s8(a, b);
4019 }
4020
4021 // CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4022 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4023 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4024 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4025 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4026 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
4027 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
4028 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
4029 // CHECK: ret <4 x i16> [[TMP2]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)4030 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
4031 return vqrshl_s16(a, b);
4032 }
4033
4034 // CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4035 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4036 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4037 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4038 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4039 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
4040 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
4041 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
4042 // CHECK: ret <2 x i32> [[TMP2]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)4043 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
4044 return vqrshl_s32(a, b);
4045 }
4046
4047 // CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
4048 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
4049 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4050 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
4051 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4052 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
4053 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
4054 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
4055 // CHECK: ret <1 x i64> [[TMP2]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)4056 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
4057 return vqrshl_s64(a, b);
4058 }
4059
4060 // CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4061 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4062 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)4063 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
4064 return vqrshl_u8(a, b);
4065 }
4066
4067 // CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4068 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4069 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4070 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4071 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4072 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
4073 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
4074 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
4075 // CHECK: ret <4 x i16> [[TMP2]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)4076 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
4077 return vqrshl_u16(a, b);
4078 }
4079
4080 // CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4081 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4082 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4083 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4084 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4085 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
4086 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
4087 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
4088 // CHECK: ret <2 x i32> [[TMP2]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)4089 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
4090 return vqrshl_u32(a, b);
4091 }
4092
4093 // CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
4094 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
4095 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4096 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
4097 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4098 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
4099 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
4100 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
4101 // CHECK: ret <1 x i64> [[TMP2]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)4102 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
4103 return vqrshl_u64(a, b);
4104 }
4105
4106 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4107 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4108 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)4109 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
4110 return vqrshlq_s8(a, b);
4111 }
4112
4113 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4114 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4115 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4116 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4117 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4118 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
4119 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
4120 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
4121 // CHECK: ret <8 x i16> [[TMP2]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)4122 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
4123 return vqrshlq_s16(a, b);
4124 }
4125
4126 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4127 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4128 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4129 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4130 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4131 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
4132 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
4133 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
4134 // CHECK: ret <4 x i32> [[TMP2]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)4135 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
4136 return vqrshlq_s32(a, b);
4137 }
4138
4139 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
4140 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4141 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4142 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4143 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4144 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
4145 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
4146 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
4147 // CHECK: ret <2 x i64> [[TMP2]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)4148 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
4149 return vqrshlq_s64(a, b);
4150 }
4151
4152 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4153 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4154 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)4155 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
4156 return vqrshlq_u8(a, b);
4157 }
4158
4159 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4160 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4161 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4162 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4163 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4164 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
4165 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
4166 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
4167 // CHECK: ret <8 x i16> [[TMP2]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)4168 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
4169 return vqrshlq_u16(a, b);
4170 }
4171
4172 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4173 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4174 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4175 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4176 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4177 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
4178 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
4179 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
4180 // CHECK: ret <4 x i32> [[TMP2]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)4181 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
4182 return vqrshlq_u32(a, b);
4183 }
4184
4185 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
4186 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4187 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4188 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4189 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4190 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
4191 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
4192 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
4193 // CHECK: ret <2 x i64> [[TMP2]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)4194 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
4195 return vqrshlq_u64(a, b);
4196 }
4197
4198 // CHECK-LABEL: define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) #0 {
4199 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
4200 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4201 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
4202 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4203 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
4204 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_p64(poly64x1_t a,poly64x1_t b)4205 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
4206 return vsli_n_p64(a, b, 0);
4207 }
4208
4209 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 {
4210 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4211 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4212 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4213 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4214 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
4215 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_p64(poly64x2_t a,poly64x2_t b)4216 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
4217 return vsliq_n_p64(a, b, 0);
4218 }
4219
4220 // CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4221 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4222 // CHECK: ret <8 x i8> [[VMAX_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)4223 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
4224 return vmax_s8(a, b);
4225 }
4226
4227 // CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4228 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4229 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4230 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4231 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4232 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
4233 // CHECK: ret <4 x i16> [[VMAX2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)4234 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
4235 return vmax_s16(a, b);
4236 }
4237
4238 // CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4239 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4240 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4241 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4242 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4243 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
4244 // CHECK: ret <2 x i32> [[VMAX2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)4245 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
4246 return vmax_s32(a, b);
4247 }
4248
4249 // CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4250 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4251 // CHECK: ret <8 x i8> [[VMAX_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)4252 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
4253 return vmax_u8(a, b);
4254 }
4255
4256 // CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4257 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4258 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4259 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4260 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4261 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
4262 // CHECK: ret <4 x i16> [[VMAX2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)4263 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
4264 return vmax_u16(a, b);
4265 }
4266
4267 // CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4268 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4269 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4270 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4271 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4272 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
4273 // CHECK: ret <2 x i32> [[VMAX2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)4274 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
4275 return vmax_u32(a, b);
4276 }
4277
4278 // CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 {
4279 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4280 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4281 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4282 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4283 // CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> [[VMAX_I]], <2 x float> [[VMAX1_I]]) #4
4284 // CHECK: ret <2 x float> [[VMAX2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)4285 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
4286 return vmax_f32(a, b);
4287 }
4288
4289 // CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4290 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4291 // CHECK: ret <16 x i8> [[VMAX_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)4292 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
4293 return vmaxq_s8(a, b);
4294 }
4295
4296 // CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4297 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4298 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4299 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4300 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4301 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
4302 // CHECK: ret <8 x i16> [[VMAX2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)4303 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
4304 return vmaxq_s16(a, b);
4305 }
4306
4307 // CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4308 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4309 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4310 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4311 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4312 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
4313 // CHECK: ret <4 x i32> [[VMAX2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)4314 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
4315 return vmaxq_s32(a, b);
4316 }
4317
4318 // CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4319 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4320 // CHECK: ret <16 x i8> [[VMAX_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)4321 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
4322 return vmaxq_u8(a, b);
4323 }
4324
4325 // CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4326 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4327 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4328 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4329 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4330 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
4331 // CHECK: ret <8 x i16> [[VMAX2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)4332 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
4333 return vmaxq_u16(a, b);
4334 }
4335
4336 // CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4339 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4340 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4341 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
4342 // CHECK: ret <4 x i32> [[VMAX2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)4343 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
4344 return vmaxq_u32(a, b);
4345 }
4346
4347 // CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
4348 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4349 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4350 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4351 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4352 // CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> [[VMAX_I]], <4 x float> [[VMAX1_I]]) #4
4353 // CHECK: ret <4 x float> [[VMAX2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)4354 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
4355 return vmaxq_f32(a, b);
4356 }
4357
4358 // CHECK-LABEL: define <2 x double> @test_vmaxq_f64(<2 x double> %a, <2 x double> %b) #0 {
4359 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4360 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4361 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4362 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4363 // CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> [[VMAX_I]], <2 x double> [[VMAX1_I]]) #4
4364 // CHECK: ret <2 x double> [[VMAX2_I]]
test_vmaxq_f64(float64x2_t a,float64x2_t b)4365 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
4366 return vmaxq_f64(a, b);
4367 }
4368
4369
4370 // CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4371 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4372 // CHECK: ret <8 x i8> [[VMIN_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)4373 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
4374 return vmin_s8(a, b);
4375 }
4376
4377 // CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4378 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4379 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4380 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4381 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4382 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
4383 // CHECK: ret <4 x i16> [[VMIN2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)4384 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
4385 return vmin_s16(a, b);
4386 }
4387
4388 // CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4389 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4390 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4391 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4392 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4393 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
4394 // CHECK: ret <2 x i32> [[VMIN2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)4395 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
4396 return vmin_s32(a, b);
4397 }
4398
4399 // CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4400 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4401 // CHECK: ret <8 x i8> [[VMIN_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)4402 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
4403 return vmin_u8(a, b);
4404 }
4405
4406 // CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4407 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4408 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4409 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4410 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4411 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
4412 // CHECK: ret <4 x i16> [[VMIN2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)4413 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
4414 return vmin_u16(a, b);
4415 }
4416
4417 // CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4418 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4419 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4420 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4421 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4422 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
4423 // CHECK: ret <2 x i32> [[VMIN2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)4424 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
4425 return vmin_u32(a, b);
4426 }
4427
4428 // CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 {
4429 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4430 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4431 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4432 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4433 // CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> [[VMIN_I]], <2 x float> [[VMIN1_I]]) #4
4434 // CHECK: ret <2 x float> [[VMIN2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)4435 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
4436 return vmin_f32(a, b);
4437 }
4438
4439 // CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4440 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4441 // CHECK: ret <16 x i8> [[VMIN_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)4442 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
4443 return vminq_s8(a, b);
4444 }
4445
4446 // CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4447 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4448 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4449 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4450 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4451 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
4452 // CHECK: ret <8 x i16> [[VMIN2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)4453 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
4454 return vminq_s16(a, b);
4455 }
4456
4457 // CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4458 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4459 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4460 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4461 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4462 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
4463 // CHECK: ret <4 x i32> [[VMIN2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)4464 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
4465 return vminq_s32(a, b);
4466 }
4467
4468 // CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4469 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4470 // CHECK: ret <16 x i8> [[VMIN_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)4471 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
4472 return vminq_u8(a, b);
4473 }
4474
4475 // CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4476 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4477 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4478 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4479 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4480 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
4481 // CHECK: ret <8 x i16> [[VMIN2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)4482 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
4483 return vminq_u16(a, b);
4484 }
4485
4486 // CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4487 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4488 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4489 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4490 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4491 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
4492 // CHECK: ret <4 x i32> [[VMIN2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)4493 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
4494 return vminq_u32(a, b);
4495 }
4496
4497 // CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 {
4498 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4499 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4500 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4501 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4502 // CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> [[VMIN_I]], <4 x float> [[VMIN1_I]]) #4
4503 // CHECK: ret <4 x float> [[VMIN2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)4504 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
4505 return vminq_f32(a, b);
4506 }
4507
4508 // CHECK-LABEL: define <2 x double> @test_vminq_f64(<2 x double> %a, <2 x double> %b) #0 {
4509 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4510 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4511 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4512 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4513 // CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> [[VMIN_I]], <2 x double> [[VMIN1_I]]) #4
4514 // CHECK: ret <2 x double> [[VMIN2_I]]
test_vminq_f64(float64x2_t a,float64x2_t b)4515 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
4516 return vminq_f64(a, b);
4517 }
4518
4519 // CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4520 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4521 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4522 // CHECK: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4523 // CHECK: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4524 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> [[VMAXNM_I]], <2 x float> [[VMAXNM1_I]]) #4
4525 // CHECK: ret <2 x float> [[VMAXNM2_I]]
test_vmaxnm_f32(float32x2_t a,float32x2_t b)4526 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
4527 return vmaxnm_f32(a, b);
4528 }
4529
4530 // CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4531 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4532 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4533 // CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4534 // CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4535 // CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[VMAXNM_I]], <4 x float> [[VMAXNM1_I]]) #4
4536 // CHECK: ret <4 x float> [[VMAXNM2_I]]
test_vmaxnmq_f32(float32x4_t a,float32x4_t b)4537 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
4538 return vmaxnmq_f32(a, b);
4539 }
4540
4541 // CHECK-LABEL: define <2 x double> @test_vmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4542 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4543 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4544 // CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4545 // CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4546 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[VMAXNM_I]], <2 x double> [[VMAXNM1_I]]) #4
4547 // CHECK: ret <2 x double> [[VMAXNM2_I]]
test_vmaxnmq_f64(float64x2_t a,float64x2_t b)4548 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
4549 return vmaxnmq_f64(a, b);
4550 }
4551
4552 // CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4553 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4554 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4555 // CHECK: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4556 // CHECK: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4557 // CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> [[VMINNM_I]], <2 x float> [[VMINNM1_I]]) #4
4558 // CHECK: ret <2 x float> [[VMINNM2_I]]
test_vminnm_f32(float32x2_t a,float32x2_t b)4559 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
4560 return vminnm_f32(a, b);
4561 }
4562
4563 // CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4564 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4565 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4566 // CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4567 // CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4568 // CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[VMINNM_I]], <4 x float> [[VMINNM1_I]]) #4
4569 // CHECK: ret <4 x float> [[VMINNM2_I]]
test_vminnmq_f32(float32x4_t a,float32x4_t b)4570 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
4571 return vminnmq_f32(a, b);
4572 }
4573
4574 // CHECK-LABEL: define <2 x double> @test_vminnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4575 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4576 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4577 // CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4578 // CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4579 // CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[VMINNM_I]], <2 x double> [[VMINNM1_I]]) #4
4580 // CHECK: ret <2 x double> [[VMINNM2_I]]
test_vminnmq_f64(float64x2_t a,float64x2_t b)4581 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
4582 return vminnmq_f64(a, b);
4583 }
4584
4585 // CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4586 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4587 // CHECK: ret <8 x i8> [[VPMAX_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)4588 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
4589 return vpmax_s8(a, b);
4590 }
4591
4592 // CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4593 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4594 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4595 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4596 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4597 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
4598 // CHECK: ret <4 x i16> [[VPMAX2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)4599 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
4600 return vpmax_s16(a, b);
4601 }
4602
4603 // CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4604 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4605 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4606 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4607 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4608 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
4609 // CHECK: ret <2 x i32> [[VPMAX2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)4610 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
4611 return vpmax_s32(a, b);
4612 }
4613
4614 // CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4615 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4616 // CHECK: ret <8 x i8> [[VPMAX_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)4617 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
4618 return vpmax_u8(a, b);
4619 }
4620
4621 // CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4622 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4623 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4624 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4625 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4626 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
4627 // CHECK: ret <4 x i16> [[VPMAX2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)4628 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
4629 return vpmax_u16(a, b);
4630 }
4631
4632 // CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4633 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4634 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4635 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4636 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4637 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
4638 // CHECK: ret <2 x i32> [[VPMAX2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)4639 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
4640 return vpmax_u32(a, b);
4641 }
4642
4643 // CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 {
4644 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4645 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4646 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4647 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4648 // CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) #4
4649 // CHECK: ret <2 x float> [[VPMAX2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)4650 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
4651 return vpmax_f32(a, b);
4652 }
4653
4654 // CHECK-LABEL: define <16 x i8> @test_vpmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4655 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4656 // CHECK: ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_s8(int8x16_t a,int8x16_t b)4657 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
4658 return vpmaxq_s8(a, b);
4659 }
4660
4661 // CHECK-LABEL: define <8 x i16> @test_vpmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4662 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4663 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4664 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4665 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4666 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4
4667 // CHECK: ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_s16(int16x8_t a,int16x8_t b)4668 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
4669 return vpmaxq_s16(a, b);
4670 }
4671
4672 // CHECK-LABEL: define <4 x i32> @test_vpmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4673 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4674 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4675 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4676 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4677 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4
4678 // CHECK: ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_s32(int32x4_t a,int32x4_t b)4679 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
4680 return vpmaxq_s32(a, b);
4681 }
4682
4683 // CHECK-LABEL: define <16 x i8> @test_vpmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4684 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4685 // CHECK: ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_u8(uint8x16_t a,uint8x16_t b)4686 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
4687 return vpmaxq_u8(a, b);
4688 }
4689
4690 // CHECK-LABEL: define <8 x i16> @test_vpmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4691 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4692 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4693 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4694 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4695 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4
4696 // CHECK: ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_u16(uint16x8_t a,uint16x8_t b)4697 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
4698 return vpmaxq_u16(a, b);
4699 }
4700
4701 // CHECK-LABEL: define <4 x i32> @test_vpmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4702 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4703 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4704 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4706 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4
4707 // CHECK: ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_u32(uint32x4_t a,uint32x4_t b)4708 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
4709 return vpmaxq_u32(a, b);
4710 }
4711
4712 // CHECK-LABEL: define <4 x float> @test_vpmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
4713 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4714 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4715 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4716 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4717 // CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]]) #4
4718 // CHECK: ret <4 x float> [[VPMAX2_I]]
test_vpmaxq_f32(float32x4_t a,float32x4_t b)4719 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
4720 return vpmaxq_f32(a, b);
4721 }
4722
4723 // CHECK-LABEL: define <2 x double> @test_vpmaxq_f64(<2 x double> %a, <2 x double> %b) #0 {
4724 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4725 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4726 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4727 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4728 // CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]]) #4
4729 // CHECK: ret <2 x double> [[VPMAX2_I]]
test_vpmaxq_f64(float64x2_t a,float64x2_t b)4730 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
4731 return vpmaxq_f64(a, b);
4732 }
4733
4734 // CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4735 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4736 // CHECK: ret <8 x i8> [[VPMIN_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)4737 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
4738 return vpmin_s8(a, b);
4739 }
4740
4741 // CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4742 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4743 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4744 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4745 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4746 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
4747 // CHECK: ret <4 x i16> [[VPMIN2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)4748 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
4749 return vpmin_s16(a, b);
4750 }
4751
4752 // CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4754 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4755 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4756 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4757 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
4758 // CHECK: ret <2 x i32> [[VPMIN2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)4759 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
4760 return vpmin_s32(a, b);
4761 }
4762
4763 // CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4764 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4765 // CHECK: ret <8 x i8> [[VPMIN_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)4766 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
4767 return vpmin_u8(a, b);
4768 }
4769
4770 // CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4771 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4772 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4773 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4774 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4775 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
4776 // CHECK: ret <4 x i16> [[VPMIN2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)4777 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
4778 return vpmin_u16(a, b);
4779 }
4780
4781 // CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4782 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4783 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4784 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4785 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4786 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
4787 // CHECK: ret <2 x i32> [[VPMIN2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)4788 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
4789 return vpmin_u32(a, b);
4790 }
4791
4792 // CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 {
4793 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4794 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4795 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4796 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4797 // CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> [[VPMIN1_I]]) #4
4798 // CHECK: ret <2 x float> [[VPMIN2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)4799 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
4800 return vpmin_f32(a, b);
4801 }
4802
4803 // CHECK-LABEL: define <16 x i8> @test_vpminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4804 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4805 // CHECK: ret <16 x i8> [[VPMIN_I]]
test_vpminq_s8(int8x16_t a,int8x16_t b)4806 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
4807 return vpminq_s8(a, b);
4808 }
4809
4810 // CHECK-LABEL: define <8 x i16> @test_vpminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4811 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4812 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4813 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4815 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4
4816 // CHECK: ret <8 x i16> [[VPMIN2_I]]
test_vpminq_s16(int16x8_t a,int16x8_t b)4817 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
4818 return vpminq_s16(a, b);
4819 }
4820
4821 // CHECK-LABEL: define <4 x i32> @test_vpminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4822 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4823 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4824 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4825 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4826 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4
4827 // CHECK: ret <4 x i32> [[VPMIN2_I]]
test_vpminq_s32(int32x4_t a,int32x4_t b)4828 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
4829 return vpminq_s32(a, b);
4830 }
4831
4832 // CHECK-LABEL: define <16 x i8> @test_vpminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4833 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4834 // CHECK: ret <16 x i8> [[VPMIN_I]]
test_vpminq_u8(uint8x16_t a,uint8x16_t b)4835 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
4836 return vpminq_u8(a, b);
4837 }
4838
4839 // CHECK-LABEL: define <8 x i16> @test_vpminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4840 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4841 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4842 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4843 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4844 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4
4845 // CHECK: ret <8 x i16> [[VPMIN2_I]]
test_vpminq_u16(uint16x8_t a,uint16x8_t b)4846 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
4847 return vpminq_u16(a, b);
4848 }
4849
4850 // CHECK-LABEL: define <4 x i32> @test_vpminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4851 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4852 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4853 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4854 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4855 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4
4856 // CHECK: ret <4 x i32> [[VPMIN2_I]]
test_vpminq_u32(uint32x4_t a,uint32x4_t b)4857 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
4858 return vpminq_u32(a, b);
4859 }
4860
4861 // CHECK-LABEL: define <4 x float> @test_vpminq_f32(<4 x float> %a, <4 x float> %b) #0 {
4862 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4863 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4864 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4865 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4866 // CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> [[VPMIN1_I]]) #4
4867 // CHECK: ret <4 x float> [[VPMIN2_I]]
test_vpminq_f32(float32x4_t a,float32x4_t b)4868 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
4869 return vpminq_f32(a, b);
4870 }
4871
4872 // CHECK-LABEL: define <2 x double> @test_vpminq_f64(<2 x double> %a, <2 x double> %b) #0 {
4873 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4874 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4875 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4876 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4877 // CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> [[VPMIN1_I]]) #4
4878 // CHECK: ret <2 x double> [[VPMIN2_I]]
test_vpminq_f64(float64x2_t a,float64x2_t b)4879 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
4880 return vpminq_f64(a, b);
4881 }
4882
4883 // CHECK-LABEL: define <2 x float> @test_vpmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4884 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4885 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4886 // CHECK: [[VPMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4887 // CHECK: [[VPMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4888 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[VPMAXNM_I]], <2 x float> [[VPMAXNM1_I]]) #4
4889 // CHECK: ret <2 x float> [[VPMAXNM2_I]]
test_vpmaxnm_f32(float32x2_t a,float32x2_t b)4890 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
4891 return vpmaxnm_f32(a, b);
4892 }
4893
4894 // CHECK-LABEL: define <4 x float> @test_vpmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4895 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4896 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4897 // CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4898 // CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4899 // CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[VPMAXNM_I]], <4 x float> [[VPMAXNM1_I]]) #4
4900 // CHECK: ret <4 x float> [[VPMAXNM2_I]]
test_vpmaxnmq_f32(float32x4_t a,float32x4_t b)4901 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
4902 return vpmaxnmq_f32(a, b);
4903 }
4904
4905 // CHECK-LABEL: define <2 x double> @test_vpmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4906 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4907 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4908 // CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4909 // CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4910 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[VPMAXNM_I]], <2 x double> [[VPMAXNM1_I]]) #4
4911 // CHECK: ret <2 x double> [[VPMAXNM2_I]]
test_vpmaxnmq_f64(float64x2_t a,float64x2_t b)4912 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
4913 return vpmaxnmq_f64(a, b);
4914 }
4915
4916 // CHECK-LABEL: define <2 x float> @test_vpminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4917 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4918 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4919 // CHECK: [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4920 // CHECK: [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4921 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> [[VPMINNM1_I]]) #4
4922 // CHECK: ret <2 x float> [[VPMINNM2_I]]
test_vpminnm_f32(float32x2_t a,float32x2_t b)4923 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
4924 return vpminnm_f32(a, b);
4925 }
4926
4927 // CHECK-LABEL: define <4 x float> @test_vpminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4928 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4929 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4930 // CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4931 // CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4932 // CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> [[VPMINNM1_I]]) #4
4933 // CHECK: ret <4 x float> [[VPMINNM2_I]]
test_vpminnmq_f32(float32x4_t a,float32x4_t b)4934 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
4935 return vpminnmq_f32(a, b);
4936 }
4937
4938 // CHECK-LABEL: define <2 x double> @test_vpminnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4939 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4940 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4941 // CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4942 // CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4943 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> [[VPMINNM1_I]]) #4
4944 // CHECK: ret <2 x double> [[VPMINNM2_I]]
test_vpminnmq_f64(float64x2_t a,float64x2_t b)4945 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
4946 return vpminnmq_f64(a, b);
4947 }
4948
4949 // CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4950 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4951 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)4952 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
4953 return vpadd_s8(a, b);
4954 }
4955
4956 // CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4957 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4958 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4959 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4960 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4961 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
4962 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4963 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
4964 // CHECK: ret <4 x i16> [[TMP2]]
test_vpadd_s16(int16x4_t a,int16x4_t b)4965 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
4966 return vpadd_s16(a, b);
4967 }
4968
4969 // CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4970 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4971 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4972 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4973 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4974 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
4975 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4976 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
4977 // CHECK: ret <2 x i32> [[TMP2]]
test_vpadd_s32(int32x2_t a,int32x2_t b)4978 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
4979 return vpadd_s32(a, b);
4980 }
4981
4982 // CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4983 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4984 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)4985 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
4986 return vpadd_u8(a, b);
4987 }
4988
4989 // CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4991 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4992 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4993 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4994 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
4995 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4996 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
4997 // CHECK: ret <4 x i16> [[TMP2]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)4998 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
4999 return vpadd_u16(a, b);
5000 }
5001
5002 // CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
5003 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5004 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5005 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5006 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5007 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
5008 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
5009 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
5010 // CHECK: ret <2 x i32> [[TMP2]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)5011 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
5012 return vpadd_u32(a, b);
5013 }
5014
5015 // CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 {
5016 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
5017 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
5018 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
5019 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
5020 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4
5021 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
5022 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float>
5023 // CHECK: ret <2 x float> [[TMP2]]
test_vpadd_f32(float32x2_t a,float32x2_t b)5024 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
5025 return vpadd_f32(a, b);
5026 }
5027
5028 // CHECK-LABEL: define <16 x i8> @test_vpaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5029 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
5030 // CHECK: ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_s8(int8x16_t a,int8x16_t b)5031 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
5032 return vpaddq_s8(a, b);
5033 }
5034
5035 // CHECK-LABEL: define <8 x i16> @test_vpaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5036 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5037 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5038 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5039 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5040 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4
5041 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
5042 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
5043 // CHECK: ret <8 x i16> [[TMP2]]
test_vpaddq_s16(int16x8_t a,int16x8_t b)5044 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
5045 return vpaddq_s16(a, b);
5046 }
5047
5048 // CHECK-LABEL: define <4 x i32> @test_vpaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5049 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5050 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5051 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5052 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5053 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4
5054 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
5055 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
5056 // CHECK: ret <4 x i32> [[TMP2]]
test_vpaddq_s32(int32x4_t a,int32x4_t b)5057 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
5058 return vpaddq_s32(a, b);
5059 }
5060
5061 // CHECK-LABEL: define <16 x i8> @test_vpaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
5062 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
5063 // CHECK: ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_u8(uint8x16_t a,uint8x16_t b)5064 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
5065 return vpaddq_u8(a, b);
5066 }
5067
5068 // CHECK-LABEL: define <8 x i16> @test_vpaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
5069 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5070 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5071 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5072 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5073 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4
5074 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
5075 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
5076 // CHECK: ret <8 x i16> [[TMP2]]
test_vpaddq_u16(uint16x8_t a,uint16x8_t b)5077 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
5078 return vpaddq_u16(a, b);
5079 }
5080
5081 // CHECK-LABEL: define <4 x i32> @test_vpaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
5082 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5083 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5084 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5085 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5086 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4
5087 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
5088 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
5089 // CHECK: ret <4 x i32> [[TMP2]]
test_vpaddq_u32(uint32x4_t a,uint32x4_t b)5090 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
5091 return vpaddq_u32(a, b);
5092 }
5093
5094 // CHECK-LABEL: define <4 x float> @test_vpaddq_f32(<4 x float> %a, <4 x float> %b) #0 {
5095 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
5096 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
5097 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
5098 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
5099 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> [[VPADDQ_V_I]], <4 x float> [[VPADDQ_V1_I]]) #4
5100 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
5101 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x float>
5102 // CHECK: ret <4 x float> [[TMP2]]
test_vpaddq_f32(float32x4_t a,float32x4_t b)5103 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
5104 return vpaddq_f32(a, b);
5105 }
5106
5107 // CHECK-LABEL: define <2 x double> @test_vpaddq_f64(<2 x double> %a, <2 x double> %b) #0 {
5108 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
5109 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
5110 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
5111 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
5112 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> [[VPADDQ_V_I]], <2 x double> [[VPADDQ_V1_I]]) #4
5113 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
5114 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x double>
5115 // CHECK: ret <2 x double> [[TMP2]]
test_vpaddq_f64(float64x2_t a,float64x2_t b)5116 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
5117 return vpaddq_f64(a, b);
5118 }
5119
5120 // CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5121 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5122 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5123 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5124 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5125 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4
5126 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
5127 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
5128 // CHECK: ret <4 x i16> [[TMP2]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)5129 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
5130 return vqdmulh_s16(a, b);
5131 }
5132
5133 // CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5134 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5135 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5136 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5137 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5138 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4
5139 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
5140 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
5141 // CHECK: ret <2 x i32> [[TMP2]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)5142 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
5143 return vqdmulh_s32(a, b);
5144 }
5145
5146 // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5147 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5148 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5149 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5150 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5151 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4
5152 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
5153 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
5154 // CHECK: ret <8 x i16> [[TMP2]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)5155 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
5156 return vqdmulhq_s16(a, b);
5157 }
5158
5159 // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5160 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5161 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5162 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5163 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4
5165 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
5166 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
5167 // CHECK: ret <4 x i32> [[TMP2]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)5168 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
5169 return vqdmulhq_s32(a, b);
5170 }
5171
5172 // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5173 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5174 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5175 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5176 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5177 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4
5178 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
5179 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
5180 // CHECK: ret <4 x i16> [[TMP2]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)5181 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
5182 return vqrdmulh_s16(a, b);
5183 }
5184
5185 // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5186 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5187 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5188 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5189 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5190 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4
5191 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
5192 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
5193 // CHECK: ret <2 x i32> [[TMP2]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)5194 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
5195 return vqrdmulh_s32(a, b);
5196 }
5197
5198 // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5199 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5200 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5201 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5202 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5203 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4
5204 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
5205 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
5206 // CHECK: ret <8 x i16> [[TMP2]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)5207 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
5208 return vqrdmulhq_s16(a, b);
5209 }
5210
5211 // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5212 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5213 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5214 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5215 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5216 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4
5217 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
5218 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
5219 // CHECK: ret <4 x i32> [[TMP2]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)5220 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
5221 return vqrdmulhq_s32(a, b);
5222 }
5223
5224 // CHECK-LABEL: define <2 x float> @test_vmulx_f32(<2 x float> %a, <2 x float> %b) #0 {
5225 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
5226 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
5227 // CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
5228 // CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
5229 // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #4
5230 // CHECK: ret <2 x float> [[VMULX2_I]]
test_vmulx_f32(float32x2_t a,float32x2_t b)5231 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
5232 return vmulx_f32(a, b);
5233 }
5234
5235 // CHECK-LABEL: define <4 x float> @test_vmulxq_f32(<4 x float> %a, <4 x float> %b) #0 {
5236 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
5237 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
5238 // CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
5239 // CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
5240 // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #4
5241 // CHECK: ret <4 x float> [[VMULX2_I]]
test_vmulxq_f32(float32x4_t a,float32x4_t b)5242 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
5243 return vmulxq_f32(a, b);
5244 }
5245
5246 // CHECK-LABEL: define <2 x double> @test_vmulxq_f64(<2 x double> %a, <2 x double> %b) #0 {
5247 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
5248 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
5249 // CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
5250 // CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
5251 // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #4
5252 // CHECK: ret <2 x double> [[VMULX2_I]]
test_vmulxq_f64(float64x2_t a,float64x2_t b)5253 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
5254 return vmulxq_f64(a, b);
5255 }
5256
5257 // CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 {
5258 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5259 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)5260 int8x8_t test_vshl_n_s8(int8x8_t a) {
5261 return vshl_n_s8(a, 3);
5262 }
5263
5264 // CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 {
5265 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5266 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5267 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5268 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)5269 int16x4_t test_vshl_n_s16(int16x4_t a) {
5270 return vshl_n_s16(a, 3);
5271 }
5272
5273 // CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 {
5274 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5275 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5276 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
5277 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)5278 int32x2_t test_vshl_n_s32(int32x2_t a) {
5279 return vshl_n_s32(a, 3);
5280 }
5281
5282 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 {
5283 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5284 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)5285 int8x16_t test_vshlq_n_s8(int8x16_t a) {
5286 return vshlq_n_s8(a, 3);
5287 }
5288
5289 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 {
5290 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5291 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5292 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5293 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)5294 int16x8_t test_vshlq_n_s16(int16x8_t a) {
5295 return vshlq_n_s16(a, 3);
5296 }
5297
5298 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 {
5299 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5300 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5301 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5302 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)5303 int32x4_t test_vshlq_n_s32(int32x4_t a) {
5304 return vshlq_n_s32(a, 3);
5305 }
5306
5307 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 {
5308 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5309 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5310 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
5311 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)5312 int64x2_t test_vshlq_n_s64(int64x2_t a) {
5313 return vshlq_n_s64(a, 3);
5314 }
5315
5316 // CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 {
5317 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5318 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(int8x8_t a)5319 int8x8_t test_vshl_n_u8(int8x8_t a) {
5320 return vshl_n_u8(a, 3);
5321 }
5322
5323 // CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 {
5324 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5325 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5326 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5327 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(int16x4_t a)5328 int16x4_t test_vshl_n_u16(int16x4_t a) {
5329 return vshl_n_u16(a, 3);
5330 }
5331
5332 // CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 {
5333 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5334 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5335 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
5336 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(int32x2_t a)5337 int32x2_t test_vshl_n_u32(int32x2_t a) {
5338 return vshl_n_u32(a, 3);
5339 }
5340
5341 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 {
5342 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5343 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(int8x16_t a)5344 int8x16_t test_vshlq_n_u8(int8x16_t a) {
5345 return vshlq_n_u8(a, 3);
5346 }
5347
5348 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 {
5349 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5350 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5351 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5352 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(int16x8_t a)5353 int16x8_t test_vshlq_n_u16(int16x8_t a) {
5354 return vshlq_n_u16(a, 3);
5355 }
5356
5357 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 {
5358 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5359 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5360 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5361 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(int32x4_t a)5362 int32x4_t test_vshlq_n_u32(int32x4_t a) {
5363 return vshlq_n_u32(a, 3);
5364 }
5365
5366 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 {
5367 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5368 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5369 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
5370 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(int64x2_t a)5371 int64x2_t test_vshlq_n_u64(int64x2_t a) {
5372 return vshlq_n_u64(a, 3);
5373 }
5374
5375 // CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 {
5376 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5377 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)5378 int8x8_t test_vshr_n_s8(int8x8_t a) {
5379 return vshr_n_s8(a, 3);
5380 }
5381
5382 // CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 {
5383 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5384 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5385 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5386 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)5387 int16x4_t test_vshr_n_s16(int16x4_t a) {
5388 return vshr_n_s16(a, 3);
5389 }
5390
5391 // CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 {
5392 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5393 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5394 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
5395 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)5396 int32x2_t test_vshr_n_s32(int32x2_t a) {
5397 return vshr_n_s32(a, 3);
5398 }
5399
5400 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 {
5401 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5402 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)5403 int8x16_t test_vshrq_n_s8(int8x16_t a) {
5404 return vshrq_n_s8(a, 3);
5405 }
5406
5407 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 {
5408 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5409 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5410 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5411 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)5412 int16x8_t test_vshrq_n_s16(int16x8_t a) {
5413 return vshrq_n_s16(a, 3);
5414 }
5415
5416 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 {
5417 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5418 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5419 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5420 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)5421 int32x4_t test_vshrq_n_s32(int32x4_t a) {
5422 return vshrq_n_s32(a, 3);
5423 }
5424
5425 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 {
5426 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5427 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5428 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
5429 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)5430 int64x2_t test_vshrq_n_s64(int64x2_t a) {
5431 return vshrq_n_s64(a, 3);
5432 }
5433
5434 // CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 {
5435 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5436 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(int8x8_t a)5437 int8x8_t test_vshr_n_u8(int8x8_t a) {
5438 return vshr_n_u8(a, 3);
5439 }
5440
5441 // CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 {
5442 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5444 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5445 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(int16x4_t a)5446 int16x4_t test_vshr_n_u16(int16x4_t a) {
5447 return vshr_n_u16(a, 3);
5448 }
5449
5450 // CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 {
5451 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5452 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5453 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
5454 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(int32x2_t a)5455 int32x2_t test_vshr_n_u32(int32x2_t a) {
5456 return vshr_n_u32(a, 3);
5457 }
5458
5459 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 {
5460 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5461 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(int8x16_t a)5462 int8x16_t test_vshrq_n_u8(int8x16_t a) {
5463 return vshrq_n_u8(a, 3);
5464 }
5465
5466 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 {
5467 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5468 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5469 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5470 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(int16x8_t a)5471 int16x8_t test_vshrq_n_u16(int16x8_t a) {
5472 return vshrq_n_u16(a, 3);
5473 }
5474
5475 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 {
5476 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5477 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5478 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5479 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(int32x4_t a)5480 int32x4_t test_vshrq_n_u32(int32x4_t a) {
5481 return vshrq_n_u32(a, 3);
5482 }
5483
5484 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 {
5485 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5486 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5487 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
5488 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(int64x2_t a)5489 int64x2_t test_vshrq_n_u64(int64x2_t a) {
5490 return vshrq_n_u64(a, 3);
5491 }
5492
5493 // CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
5494 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5495 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
5496 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)5497 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
5498 return vsra_n_s8(a, b, 3);
5499 }
5500
5501 // CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5503 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5504 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5505 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5506 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
5507 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
5508 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)5509 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
5510 return vsra_n_s16(a, b, 3);
5511 }
5512
5513 // CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5514 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5515 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5516 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5517 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5518 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
5519 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
5520 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)5521 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
5522 return vsra_n_s32(a, b, 3);
5523 }
5524
5525 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5526 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5527 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
5528 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)5529 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
5530 return vsraq_n_s8(a, b, 3);
5531 }
5532
5533 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5534 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5535 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5536 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5537 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5538 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5539 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
5540 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)5541 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
5542 return vsraq_n_s16(a, b, 3);
5543 }
5544
5545 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5546 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5547 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5548 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5549 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5550 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
5551 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
5552 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)5553 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
5554 return vsraq_n_s32(a, b, 3);
5555 }
5556
5557 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
5558 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5559 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5560 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5561 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5562 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
5563 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
5564 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)5565 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
5566 return vsraq_n_s64(a, b, 3);
5567 }
5568
5569 // CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
5570 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5571 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
5572 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_u8(int8x8_t a,int8x8_t b)5573 int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) {
5574 return vsra_n_u8(a, b, 3);
5575 }
5576
5577 // CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
5578 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5579 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5580 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5581 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5582 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
5583 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
5584 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_u16(int16x4_t a,int16x4_t b)5585 int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) {
5586 return vsra_n_u16(a, b, 3);
5587 }
5588
5589 // CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
5590 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5591 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5592 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5593 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5594 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
5595 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
5596 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_u32(int32x2_t a,int32x2_t b)5597 int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) {
5598 return vsra_n_u32(a, b, 3);
5599 }
5600
5601 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
5602 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5603 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
5604 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(int8x16_t a,int8x16_t b)5605 int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) {
5606 return vsraq_n_u8(a, b, 3);
5607 }
5608
5609 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
5610 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5612 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5613 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5614 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5615 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
5616 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(int16x8_t a,int16x8_t b)5617 int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) {
5618 return vsraq_n_u16(a, b, 3);
5619 }
5620
5621 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
5622 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5623 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5624 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5625 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5626 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
5627 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
5628 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(int32x4_t a,int32x4_t b)5629 int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) {
5630 return vsraq_n_u32(a, b, 3);
5631 }
5632
5633 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
5634 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5635 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5636 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5637 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5638 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
5639 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
5640 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(int64x2_t a,int64x2_t b)5641 int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) {
5642 return vsraq_n_u64(a, b, 3);
5643 }
5644
5645 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 {
5646 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5647 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)5648 int8x8_t test_vrshr_n_s8(int8x8_t a) {
5649 return vrshr_n_s8(a, 3);
5650 }
5651
5652 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 {
5653 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5654 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5655 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5656 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)5657 int16x4_t test_vrshr_n_s16(int16x4_t a) {
5658 return vrshr_n_s16(a, 3);
5659 }
5660
5661 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 {
5662 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5663 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5664 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5665 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)5666 int32x2_t test_vrshr_n_s32(int32x2_t a) {
5667 return vrshr_n_s32(a, 3);
5668 }
5669
5670 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 {
5671 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5672 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)5673 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
5674 return vrshrq_n_s8(a, 3);
5675 }
5676
5677 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 {
5678 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5679 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5680 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5681 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)5682 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
5683 return vrshrq_n_s16(a, 3);
5684 }
5685
5686 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 {
5687 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5688 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5689 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5690 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)5691 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
5692 return vrshrq_n_s32(a, 3);
5693 }
5694
5695 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 {
5696 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5697 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5698 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5699 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)5700 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
5701 return vrshrq_n_s64(a, 3);
5702 }
5703
5704 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 {
5705 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5706 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(int8x8_t a)5707 int8x8_t test_vrshr_n_u8(int8x8_t a) {
5708 return vrshr_n_u8(a, 3);
5709 }
5710
5711 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 {
5712 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5713 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5714 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5715 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(int16x4_t a)5716 int16x4_t test_vrshr_n_u16(int16x4_t a) {
5717 return vrshr_n_u16(a, 3);
5718 }
5719
5720 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 {
5721 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5722 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5723 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5724 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(int32x2_t a)5725 int32x2_t test_vrshr_n_u32(int32x2_t a) {
5726 return vrshr_n_u32(a, 3);
5727 }
5728
5729 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 {
5730 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5731 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(int8x16_t a)5732 int8x16_t test_vrshrq_n_u8(int8x16_t a) {
5733 return vrshrq_n_u8(a, 3);
5734 }
5735
5736 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 {
5737 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5738 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5739 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5740 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(int16x8_t a)5741 int16x8_t test_vrshrq_n_u16(int16x8_t a) {
5742 return vrshrq_n_u16(a, 3);
5743 }
5744
5745 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 {
5746 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5747 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5748 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5749 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(int32x4_t a)5750 int32x4_t test_vrshrq_n_u32(int32x4_t a) {
5751 return vrshrq_n_u32(a, 3);
5752 }
5753
5754 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 {
5755 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5756 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5757 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5758 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(int64x2_t a)5759 int64x2_t test_vrshrq_n_u64(int64x2_t a) {
5760 return vrshrq_n_u64(a, 3);
5761 }
5762
5763 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
5764 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5765 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5766 // CHECK: ret <8 x i8> [[TMP0]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)5767 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
5768 return vrsra_n_s8(a, b, 3);
5769 }
5770
5771 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5773 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5774 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5775 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5776 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5777 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5778 // CHECK: ret <4 x i16> [[TMP3]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)5779 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
5780 return vrsra_n_s16(a, b, 3);
5781 }
5782
5783 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5784 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5785 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5786 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5787 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5788 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5789 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5790 // CHECK: ret <2 x i32> [[TMP3]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)5791 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
5792 return vrsra_n_s32(a, b, 3);
5793 }
5794
5795 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5796 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5797 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5798 // CHECK: ret <16 x i8> [[TMP0]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)5799 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
5800 return vrsraq_n_s8(a, b, 3);
5801 }
5802
5803 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5804 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5805 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5806 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5807 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5808 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5809 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5810 // CHECK: ret <8 x i16> [[TMP3]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)5811 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
5812 return vrsraq_n_s16(a, b, 3);
5813 }
5814
5815 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5816 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5817 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5818 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5819 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5820 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5821 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5822 // CHECK: ret <4 x i32> [[TMP3]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)5823 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
5824 return vrsraq_n_s32(a, b, 3);
5825 }
5826
5827 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
5828 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5829 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5830 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5831 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5832 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5833 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5834 // CHECK: ret <2 x i64> [[TMP3]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)5835 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
5836 return vrsraq_n_s64(a, b, 3);
5837 }
5838
5839 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
5840 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5841 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5842 // CHECK: ret <8 x i8> [[TMP0]]
test_vrsra_n_u8(int8x8_t a,int8x8_t b)5843 int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) {
5844 return vrsra_n_u8(a, b, 3);
5845 }
5846
5847 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
5848 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5849 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5850 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5851 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5852 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5853 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5854 // CHECK: ret <4 x i16> [[TMP3]]
test_vrsra_n_u16(int16x4_t a,int16x4_t b)5855 int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) {
5856 return vrsra_n_u16(a, b, 3);
5857 }
5858
5859 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
5860 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5861 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5862 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5863 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5864 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5865 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5866 // CHECK: ret <2 x i32> [[TMP3]]
test_vrsra_n_u32(int32x2_t a,int32x2_t b)5867 int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) {
5868 return vrsra_n_u32(a, b, 3);
5869 }
5870
5871 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
5872 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5873 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5874 // CHECK: ret <16 x i8> [[TMP0]]
test_vrsraq_n_u8(int8x16_t a,int8x16_t b)5875 int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) {
5876 return vrsraq_n_u8(a, b, 3);
5877 }
5878
5879 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
5880 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5881 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5882 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5883 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5884 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5885 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5886 // CHECK: ret <8 x i16> [[TMP3]]
test_vrsraq_n_u16(int16x8_t a,int16x8_t b)5887 int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) {
5888 return vrsraq_n_u16(a, b, 3);
5889 }
5890
5891 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
5892 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5893 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5894 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5895 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5896 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5897 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5898 // CHECK: ret <4 x i32> [[TMP3]]
test_vrsraq_n_u32(int32x4_t a,int32x4_t b)5899 int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) {
5900 return vrsraq_n_u32(a, b, 3);
5901 }
5902
5903 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
5904 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5905 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5906 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5907 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5908 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5909 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5910 // CHECK: ret <2 x i64> [[TMP3]]
test_vrsraq_n_u64(int64x2_t a,int64x2_t b)5911 int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) {
5912 return vrsraq_n_u64(a, b, 3);
5913 }
5914
5915 // CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
5916 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5917 // CHECK: ret <8 x i8> [[VSRI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)5918 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
5919 return vsri_n_s8(a, b, 3);
5920 }
5921
5922 // CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5925 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5926 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5927 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5928 // CHECK: ret <4 x i16> [[VSRI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)5929 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
5930 return vsri_n_s16(a, b, 3);
5931 }
5932
5933 // CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5934 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5935 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5936 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5937 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5938 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5939 // CHECK: ret <2 x i32> [[VSRI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)5940 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
5941 return vsri_n_s32(a, b, 3);
5942 }
5943
5944 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5945 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5946 // CHECK: ret <16 x i8> [[VSRI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)5947 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
5948 return vsriq_n_s8(a, b, 3);
5949 }
5950
5951 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5952 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5953 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5954 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5955 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5956 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5957 // CHECK: ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)5958 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
5959 return vsriq_n_s16(a, b, 3);
5960 }
5961
5962 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5963 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5964 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5965 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5966 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5967 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5968 // CHECK: ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)5969 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
5970 return vsriq_n_s32(a, b, 3);
5971 }
5972
5973 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
5974 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5975 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5976 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5977 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5978 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5979 // CHECK: ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)5980 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
5981 return vsriq_n_s64(a, b, 3);
5982 }
5983
5984 // CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
5985 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5986 // CHECK: ret <8 x i8> [[VSRI_N]]
test_vsri_n_u8(int8x8_t a,int8x8_t b)5987 int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) {
5988 return vsri_n_u8(a, b, 3);
5989 }
5990
5991 // CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
5992 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5993 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5994 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5995 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5996 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5997 // CHECK: ret <4 x i16> [[VSRI_N2]]
test_vsri_n_u16(int16x4_t a,int16x4_t b)5998 int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) {
5999 return vsri_n_u16(a, b, 3);
6000 }
6001
6002 // CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
6003 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6004 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6005 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6006 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
6007 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
6008 // CHECK: ret <2 x i32> [[VSRI_N2]]
test_vsri_n_u32(int32x2_t a,int32x2_t b)6009 int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) {
6010 return vsri_n_u32(a, b, 3);
6011 }
6012
6013 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
6014 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6015 // CHECK: ret <16 x i8> [[VSRI_N]]
test_vsriq_n_u8(int8x16_t a,int8x16_t b)6016 int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) {
6017 return vsriq_n_u8(a, b, 3);
6018 }
6019
6020 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
6021 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6022 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6023 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6025 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
6026 // CHECK: ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_u16(int16x8_t a,int16x8_t b)6027 int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) {
6028 return vsriq_n_u16(a, b, 3);
6029 }
6030
6031 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
6032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6033 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6034 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6035 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
6036 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
6037 // CHECK: ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_u32(int32x4_t a,int32x4_t b)6038 int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) {
6039 return vsriq_n_u32(a, b, 3);
6040 }
6041
6042 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
6043 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6044 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6045 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6046 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
6047 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
6048 // CHECK: ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_u64(int64x2_t a,int64x2_t b)6049 int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) {
6050 return vsriq_n_u64(a, b, 3);
6051 }
6052
6053 // CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
6054 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6055 // CHECK: ret <8 x i8> [[VSRI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)6056 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
6057 return vsri_n_p8(a, b, 3);
6058 }
6059
6060 // CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
6061 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6062 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6063 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6064 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6065 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
6066 // CHECK: ret <4 x i16> [[VSRI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)6067 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
6068 return vsri_n_p16(a, b, 15);
6069 }
6070
6071 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
6072 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6073 // CHECK: ret <16 x i8> [[VSRI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)6074 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
6075 return vsriq_n_p8(a, b, 3);
6076 }
6077
6078 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
6079 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6080 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6081 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6082 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6083 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
6084 // CHECK: ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)6085 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
6086 return vsriq_n_p16(a, b, 15);
6087 }
6088
6089 // CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
6090 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6091 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)6092 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
6093 return vsli_n_s8(a, b, 3);
6094 }
6095
6096 // CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
6097 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6098 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6099 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6100 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6101 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
6102 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)6103 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
6104 return vsli_n_s16(a, b, 3);
6105 }
6106
6107 // CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
6108 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6109 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6110 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6111 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
6112 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
6113 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)6114 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
6115 return vsli_n_s32(a, b, 3);
6116 }
6117
6118 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
6119 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6120 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)6121 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
6122 return vsliq_n_s8(a, b, 3);
6123 }
6124
6125 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
6126 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6127 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6128 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6129 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6130 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
6131 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)6132 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
6133 return vsliq_n_s16(a, b, 3);
6134 }
6135
6136 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
6137 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6138 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6139 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6140 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
6141 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
6142 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)6143 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
6144 return vsliq_n_s32(a, b, 3);
6145 }
6146
6147 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
6148 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6149 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6150 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6151 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
6152 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
6153 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)6154 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
6155 return vsliq_n_s64(a, b, 3);
6156 }
6157
6158 // CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
6159 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6160 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)6161 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
6162 return vsli_n_u8(a, b, 3);
6163 }
6164
6165 // CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
6166 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6167 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6168 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6169 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6170 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
6171 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)6172 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
6173 return vsli_n_u16(a, b, 3);
6174 }
6175
6176 // CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
6177 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6178 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6179 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6180 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
6181 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
6182 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)6183 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
6184 return vsli_n_u32(a, b, 3);
6185 }
6186
6187 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
6188 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6189 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)6190 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
6191 return vsliq_n_u8(a, b, 3);
6192 }
6193
6194 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
6195 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6196 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6197 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6198 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6199 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
6200 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)6201 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
6202 return vsliq_n_u16(a, b, 3);
6203 }
6204
6205 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
6206 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6207 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6208 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6209 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
6210 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
6211 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)6212 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
6213 return vsliq_n_u32(a, b, 3);
6214 }
6215
6216 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
6217 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6218 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6219 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6220 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
6221 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
6222 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)6223 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
6224 return vsliq_n_u64(a, b, 3);
6225 }
6226
6227 // CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
6228 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6229 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)6230 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
6231 return vsli_n_p8(a, b, 3);
6232 }
6233
6234 // CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
6235 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6236 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6237 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6238 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6239 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
6240 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)6241 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
6242 return vsli_n_p16(a, b, 15);
6243 }
6244
6245 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
6246 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6247 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)6248 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
6249 return vsliq_n_p8(a, b, 3);
6250 }
6251
6252 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
6253 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6254 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6255 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6256 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6257 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
6258 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)6259 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
6260 return vsliq_n_p16(a, b, 15);
6261 }
6262
6263 // CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 {
6264 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6265 // CHECK: ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)6266 int8x8_t test_vqshlu_n_s8(int8x8_t a) {
6267 return vqshlu_n_s8(a, 3);
6268 }
6269
6270 // CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 {
6271 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6272 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6273 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
6274 // CHECK: ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)6275 int16x4_t test_vqshlu_n_s16(int16x4_t a) {
6276 return vqshlu_n_s16(a, 3);
6277 }
6278
6279 // CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 {
6280 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6281 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6282 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
6283 // CHECK: ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)6284 int32x2_t test_vqshlu_n_s32(int32x2_t a) {
6285 return vqshlu_n_s32(a, 3);
6286 }
6287
6288 // CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 {
6289 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6290 // CHECK: ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)6291 int8x16_t test_vqshluq_n_s8(int8x16_t a) {
6292 return vqshluq_n_s8(a, 3);
6293 }
6294
6295 // CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 {
6296 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6297 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6298 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
6299 // CHECK: ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)6300 int16x8_t test_vqshluq_n_s16(int16x8_t a) {
6301 return vqshluq_n_s16(a, 3);
6302 }
6303
6304 // CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 {
6305 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6306 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6307 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
6308 // CHECK: ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)6309 int32x4_t test_vqshluq_n_s32(int32x4_t a) {
6310 return vqshluq_n_s32(a, 3);
6311 }
6312
6313 // CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 {
6314 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6315 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6316 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
6317 // CHECK: ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)6318 int64x2_t test_vqshluq_n_s64(int64x2_t a) {
6319 return vqshluq_n_s64(a, 3);
6320 }
6321
6322 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 {
6323 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6324 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6325 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6326 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6327 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)6328 int8x8_t test_vshrn_n_s16(int16x8_t a) {
6329 return vshrn_n_s16(a, 3);
6330 }
6331
6332 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 {
6333 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6334 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6335 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6336 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6337 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)6338 int16x4_t test_vshrn_n_s32(int32x4_t a) {
6339 return vshrn_n_s32(a, 9);
6340 }
6341
6342 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 {
6343 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6344 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6345 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
6346 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6347 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)6348 int32x2_t test_vshrn_n_s64(int64x2_t a) {
6349 return vshrn_n_s64(a, 19);
6350 }
6351
6352 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 {
6353 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6354 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6355 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6356 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6357 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)6358 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
6359 return vshrn_n_u16(a, 3);
6360 }
6361
6362 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 {
6363 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6364 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6365 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6366 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6367 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)6368 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
6369 return vshrn_n_u32(a, 9);
6370 }
6371
6372 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 {
6373 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6374 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6375 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
6376 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6377 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)6378 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
6379 return vshrn_n_u64(a, 19);
6380 }
6381
6382 // CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6383 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6384 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6385 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6386 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6387 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6388 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_s16(int8x8_t a,int16x8_t b)6389 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6390 return vshrn_high_n_s16(a, b, 3);
6391 }
6392
6393 // CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6394 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6395 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6396 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6397 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6398 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6399 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_s32(int16x4_t a,int32x4_t b)6400 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6401 return vshrn_high_n_s32(a, b, 9);
6402 }
6403
6404 // CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6405 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6406 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6407 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
6408 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6409 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6410 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_s64(int32x2_t a,int64x2_t b)6411 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6412 return vshrn_high_n_s64(a, b, 19);
6413 }
6414
6415 // CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6416 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6417 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6418 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6419 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6420 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6421 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6422 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6423 return vshrn_high_n_u16(a, b, 3);
6424 }
6425
6426 // CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6427 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6428 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6429 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6430 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6431 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6432 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6433 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6434 return vshrn_high_n_u32(a, b, 9);
6435 }
6436
6437 // CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6438 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6439 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6440 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
6441 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6442 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6443 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6444 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6445 return vshrn_high_n_u64(a, b, 19);
6446 }
6447
6448 // CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 {
6449 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6450 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6451 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
6452 // CHECK: ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)6453 int8x8_t test_vqshrun_n_s16(int16x8_t a) {
6454 return vqshrun_n_s16(a, 3);
6455 }
6456
6457 // CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 {
6458 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6459 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6460 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
6461 // CHECK: ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)6462 int16x4_t test_vqshrun_n_s32(int32x4_t a) {
6463 return vqshrun_n_s32(a, 9);
6464 }
6465
6466 // CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 {
6467 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6468 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6469 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
6470 // CHECK: ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)6471 int32x2_t test_vqshrun_n_s64(int64x2_t a) {
6472 return vqshrun_n_s64(a, 19);
6473 }
6474
6475 // CHECK-LABEL: define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6476 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6477 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6478 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
6479 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6480 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqshrun_high_n_s16(int8x8_t a,int16x8_t b)6481 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
6482 return vqshrun_high_n_s16(a, b, 3);
6483 }
6484
6485 // CHECK-LABEL: define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6486 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6487 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6488 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
6489 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6490 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqshrun_high_n_s32(int16x4_t a,int32x4_t b)6491 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6492 return vqshrun_high_n_s32(a, b, 9);
6493 }
6494
6495 // CHECK-LABEL: define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6496 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6497 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6498 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
6499 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6500 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqshrun_high_n_s64(int32x2_t a,int64x2_t b)6501 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6502 return vqshrun_high_n_s64(a, b, 19);
6503 }
6504
6505 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 {
6506 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6507 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6508 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6509 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)6510 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
6511 return vrshrn_n_s16(a, 3);
6512 }
6513
6514 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 {
6515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6516 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6517 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6518 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)6519 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
6520 return vrshrn_n_s32(a, 9);
6521 }
6522
6523 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 {
6524 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6525 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6526 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6527 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)6528 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
6529 return vrshrn_n_s64(a, 19);
6530 }
6531
6532 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 {
6533 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6534 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6535 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6536 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)6537 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
6538 return vrshrn_n_u16(a, 3);
6539 }
6540
6541 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 {
6542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6543 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6544 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6545 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)6546 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
6547 return vrshrn_n_u32(a, 9);
6548 }
6549
6550 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 {
6551 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6552 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6553 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6554 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)6555 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
6556 return vrshrn_n_u64(a, 19);
6557 }
6558
6559 // CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6560 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6561 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6562 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6563 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6564 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_s16(int8x8_t a,int16x8_t b)6565 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6566 return vrshrn_high_n_s16(a, b, 3);
6567 }
6568
6569 // CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6570 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6571 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6572 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6573 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6574 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_s32(int16x4_t a,int32x4_t b)6575 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6576 return vrshrn_high_n_s32(a, b, 9);
6577 }
6578
6579 // CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6580 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6581 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6582 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6583 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6584 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_s64(int32x2_t a,int64x2_t b)6585 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6586 return vrshrn_high_n_s64(a, b, 19);
6587 }
6588
6589 // CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6590 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6591 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6592 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6593 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6594 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6595 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6596 return vrshrn_high_n_u16(a, b, 3);
6597 }
6598
6599 // CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6600 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6601 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6602 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6603 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6604 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6605 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6606 return vrshrn_high_n_u32(a, b, 9);
6607 }
6608
6609 // CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6610 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6611 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6612 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6613 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6614 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6615 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6616 return vrshrn_high_n_u64(a, b, 19);
6617 }
6618
6619 // CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 {
6620 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6621 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6622 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
6623 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)6624 int8x8_t test_vqrshrun_n_s16(int16x8_t a) {
6625 return vqrshrun_n_s16(a, 3);
6626 }
6627
6628 // CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 {
6629 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6630 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6631 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6632 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)6633 int16x4_t test_vqrshrun_n_s32(int32x4_t a) {
6634 return vqrshrun_n_s32(a, 9);
6635 }
6636
6637 // CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 {
6638 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6639 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6640 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6641 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)6642 int32x2_t test_vqrshrun_n_s64(int64x2_t a) {
6643 return vqrshrun_n_s64(a, 19);
6644 }
6645
6646 // CHECK-LABEL: define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6647 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6648 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6649 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
6650 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6651 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrun_high_n_s16(int8x8_t a,int16x8_t b)6652 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
6653 return vqrshrun_high_n_s16(a, b, 3);
6654 }
6655
6656 // CHECK-LABEL: define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6657 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6658 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6659 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6660 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6661 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrun_high_n_s32(int16x4_t a,int32x4_t b)6662 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6663 return vqrshrun_high_n_s32(a, b, 9);
6664 }
6665
6666 // CHECK-LABEL: define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6668 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6669 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6670 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6671 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrun_high_n_s64(int32x2_t a,int64x2_t b)6672 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6673 return vqrshrun_high_n_s64(a, b, 19);
6674 }
6675
6676 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 {
6677 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6678 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6679 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6680 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)6681 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
6682 return vqshrn_n_s16(a, 3);
6683 }
6684
6685 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 {
6686 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6687 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6688 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6689 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)6690 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
6691 return vqshrn_n_s32(a, 9);
6692 }
6693
6694 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 {
6695 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6696 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6697 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6698 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)6699 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
6700 return vqshrn_n_s64(a, 19);
6701 }
6702
6703 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 {
6704 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6705 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6706 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6707 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)6708 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
6709 return vqshrn_n_u16(a, 3);
6710 }
6711
6712 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 {
6713 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6714 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6715 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6716 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)6717 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
6718 return vqshrn_n_u32(a, 9);
6719 }
6720
6721 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 {
6722 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6723 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6724 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6725 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)6726 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
6727 return vqshrn_n_u64(a, 19);
6728 }
6729
6730 // CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6731 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6732 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6733 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6734 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6735 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_s16(int8x8_t a,int16x8_t b)6736 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6737 return vqshrn_high_n_s16(a, b, 3);
6738 }
6739
6740 // CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6741 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6742 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6743 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6744 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6745 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_s32(int16x4_t a,int32x4_t b)6746 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6747 return vqshrn_high_n_s32(a, b, 9);
6748 }
6749
6750 // CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6751 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6752 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6753 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6754 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6755 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_s64(int32x2_t a,int64x2_t b)6756 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6757 return vqshrn_high_n_s64(a, b, 19);
6758 }
6759
6760 // CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6761 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6762 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6763 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6764 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6765 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6766 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6767 return vqshrn_high_n_u16(a, b, 3);
6768 }
6769
6770 // CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6771 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6772 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6773 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6774 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6775 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6776 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6777 return vqshrn_high_n_u32(a, b, 9);
6778 }
6779
6780 // CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6781 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6782 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6783 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6784 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6785 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6786 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6787 return vqshrn_high_n_u64(a, b, 19);
6788 }
6789
6790 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 {
6791 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6792 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6793 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6794 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)6795 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
6796 return vqrshrn_n_s16(a, 3);
6797 }
6798
6799 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 {
6800 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6801 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6802 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6803 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)6804 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
6805 return vqrshrn_n_s32(a, 9);
6806 }
6807
6808 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 {
6809 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6810 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6811 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6812 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)6813 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
6814 return vqrshrn_n_s64(a, 19);
6815 }
6816
6817 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 {
6818 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6819 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6820 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6821 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)6822 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
6823 return vqrshrn_n_u16(a, 3);
6824 }
6825
6826 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 {
6827 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6828 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6829 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6830 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)6831 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
6832 return vqrshrn_n_u32(a, 9);
6833 }
6834
6835 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 {
6836 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6837 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6838 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6839 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)6840 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
6841 return vqrshrn_n_u64(a, 19);
6842 }
6843
6844 // CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6845 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6846 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6847 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6848 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6849 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_s16(int8x8_t a,int16x8_t b)6850 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6851 return vqrshrn_high_n_s16(a, b, 3);
6852 }
6853
6854 // CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6855 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6856 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6857 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6858 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6859 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_s32(int16x4_t a,int32x4_t b)6860 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6861 return vqrshrn_high_n_s32(a, b, 9);
6862 }
6863
6864 // CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6865 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6866 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6867 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6868 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6869 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_s64(int32x2_t a,int64x2_t b)6870 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6871 return vqrshrn_high_n_s64(a, b, 19);
6872 }
6873
6874 // CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6875 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6876 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6877 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6878 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6879 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6880 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6881 return vqrshrn_high_n_u16(a, b, 3);
6882 }
6883
6884 // CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6885 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6886 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6887 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6888 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6889 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6890 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6891 return vqrshrn_high_n_u32(a, b, 9);
6892 }
6893
6894 // CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6895 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6896 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6897 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6898 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6899 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6900 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6901 return vqrshrn_high_n_u64(a, b, 19);
6902 }
6903
6904 // CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
6905 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6906 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6907 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)6908 int16x8_t test_vshll_n_s8(int8x8_t a) {
6909 return vshll_n_s8(a, 3);
6910 }
6911
6912 // CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
6913 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6914 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6915 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6916 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6917 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)6918 int32x4_t test_vshll_n_s16(int16x4_t a) {
6919 return vshll_n_s16(a, 9);
6920 }
6921
6922 // CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
6923 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6924 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6925 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6926 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6927 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)6928 int64x2_t test_vshll_n_s32(int32x2_t a) {
6929 return vshll_n_s32(a, 19);
6930 }
6931
6932 // CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
6933 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6934 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6935 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)6936 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
6937 return vshll_n_u8(a, 3);
6938 }
6939
6940 // CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
6941 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6942 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6943 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6944 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6945 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)6946 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
6947 return vshll_n_u16(a, 9);
6948 }
6949
6950 // CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
6951 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6952 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6953 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6954 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6955 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)6956 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
6957 return vshll_n_u32(a, 19);
6958 }
6959
6960 // CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
6961 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6962 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6963 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6964 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_s8(int8x16_t a)6965 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
6966 return vshll_high_n_s8(a, 3);
6967 }
6968
6969 // CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
6970 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6971 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6972 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6973 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6974 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6975 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_s16(int16x8_t a)6976 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
6977 return vshll_high_n_s16(a, 9);
6978 }
6979
6980 // CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
6981 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6982 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6983 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6984 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6985 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6986 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_s32(int32x4_t a)6987 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
6988 return vshll_high_n_s32(a, 19);
6989 }
6990
6991 // CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
6992 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6993 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6994 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6995 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_u8(uint8x16_t a)6996 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
6997 return vshll_high_n_u8(a, 3);
6998 }
6999
7000 // CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
7001 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7002 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
7003 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7004 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7005 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
7006 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_u16(uint16x8_t a)7007 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
7008 return vshll_high_n_u16(a, 9);
7009 }
7010
7011 // CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
7012 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7013 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
7014 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7015 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7016 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
7017 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_u32(uint32x4_t a)7018 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
7019 return vshll_high_n_u32(a, 19);
7020 }
7021
7022 // CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 {
7023 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7024 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)7025 int16x8_t test_vmovl_s8(int8x8_t a) {
7026 return vmovl_s8(a);
7027 }
7028
7029 // CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 {
7030 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7031 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7032 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7033 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)7034 int32x4_t test_vmovl_s16(int16x4_t a) {
7035 return vmovl_s16(a);
7036 }
7037
7038 // CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 {
7039 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7040 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7041 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7042 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)7043 int64x2_t test_vmovl_s32(int32x2_t a) {
7044 return vmovl_s32(a);
7045 }
7046
7047 // CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 {
7048 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7049 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)7050 uint16x8_t test_vmovl_u8(uint8x8_t a) {
7051 return vmovl_u8(a);
7052 }
7053
7054 // CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 {
7055 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7056 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7057 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7058 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)7059 uint32x4_t test_vmovl_u16(uint16x4_t a) {
7060 return vmovl_u16(a);
7061 }
7062
7063 // CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 {
7064 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7065 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7066 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7067 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)7068 uint64x2_t test_vmovl_u32(uint32x2_t a) {
7069 return vmovl_u32(a);
7070 }
7071
7072 // CHECK-LABEL: define <8 x i16> @test_vmovl_high_s8(<16 x i8> %a) #0 {
7073 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7074 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
7075 // CHECK: ret <8 x i16> [[TMP0]]
test_vmovl_high_s8(int8x16_t a)7076 int16x8_t test_vmovl_high_s8(int8x16_t a) {
7077 return vmovl_high_s8(a);
7078 }
7079
7080 // CHECK-LABEL: define <4 x i32> @test_vmovl_high_s16(<8 x i16> %a) #0 {
7081 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7082 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7083 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7084 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7085 // CHECK: ret <4 x i32> [[TMP2]]
test_vmovl_high_s16(int16x8_t a)7086 int32x4_t test_vmovl_high_s16(int16x8_t a) {
7087 return vmovl_high_s16(a);
7088 }
7089
7090 // CHECK-LABEL: define <2 x i64> @test_vmovl_high_s32(<4 x i32> %a) #0 {
7091 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7092 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7093 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7094 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7095 // CHECK: ret <2 x i64> [[TMP2]]
test_vmovl_high_s32(int32x4_t a)7096 int64x2_t test_vmovl_high_s32(int32x4_t a) {
7097 return vmovl_high_s32(a);
7098 }
7099
7100 // CHECK-LABEL: define <8 x i16> @test_vmovl_high_u8(<16 x i8> %a) #0 {
7101 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7102 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
7103 // CHECK: ret <8 x i16> [[TMP0]]
test_vmovl_high_u8(uint8x16_t a)7104 uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
7105 return vmovl_high_u8(a);
7106 }
7107
7108 // CHECK-LABEL: define <4 x i32> @test_vmovl_high_u16(<8 x i16> %a) #0 {
7109 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7110 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7111 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7112 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7113 // CHECK: ret <4 x i32> [[TMP2]]
test_vmovl_high_u16(uint16x8_t a)7114 uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
7115 return vmovl_high_u16(a);
7116 }
7117
7118 // CHECK-LABEL: define <2 x i64> @test_vmovl_high_u32(<4 x i32> %a) #0 {
7119 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7120 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7121 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7122 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7123 // CHECK: ret <2 x i64> [[TMP2]]
test_vmovl_high_u32(uint32x4_t a)7124 uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
7125 return vmovl_high_u32(a);
7126 }
7127
7128 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 {
7129 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7130 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7131 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
7132 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)7133 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
7134 return vcvt_n_f32_s32(a, 31);
7135 }
7136
7137 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 {
7138 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7139 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7140 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
7141 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)7142 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
7143 return vcvtq_n_f32_s32(a, 31);
7144 }
7145
7146 // CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) #0 {
7147 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7148 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7149 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
7150 // CHECK: ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_s64(int64x2_t a)7151 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
7152 return vcvtq_n_f64_s64(a, 50);
7153 }
7154
7155 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 {
7156 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7157 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7158 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
7159 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)7160 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
7161 return vcvt_n_f32_u32(a, 31);
7162 }
7163
7164 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 {
7165 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7166 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7167 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
7168 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)7169 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
7170 return vcvtq_n_f32_u32(a, 31);
7171 }
7172
7173 // CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) #0 {
7174 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7175 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7176 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
7177 // CHECK: ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_u64(uint64x2_t a)7178 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
7179 return vcvtq_n_f64_u64(a, 50);
7180 }
7181
7182 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 {
7183 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
7184 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7185 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
7186 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)7187 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
7188 return vcvt_n_s32_f32(a, 31);
7189 }
7190
7191 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 {
7192 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
7193 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
7194 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
7195 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)7196 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
7197 return vcvtq_n_s32_f32(a, 31);
7198 }
7199
7200 // CHECK-LABEL: define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) #0 {
7201 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
7202 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
7203 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
7204 // CHECK: ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_s64_f64(float64x2_t a)7205 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
7206 return vcvtq_n_s64_f64(a, 50);
7207 }
7208
7209 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 {
7210 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
7211 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7212 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
7213 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)7214 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
7215 return vcvt_n_u32_f32(a, 31);
7216 }
7217
7218 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 {
7219 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
7220 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
7221 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
7222 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)7223 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
7224 return vcvtq_n_u32_f32(a, 31);
7225 }
7226
7227 // CHECK-LABEL: define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) #0 {
7228 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
7229 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
7230 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
7231 // CHECK: ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_u64_f64(float64x2_t a)7232 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
7233 return vcvtq_n_u64_f64(a, 50);
7234 }
7235
7236 // CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
7237 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7238 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7239 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7240 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)7241 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
7242 return vaddl_s8(a, b);
7243 }
7244
7245 // CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
7246 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7247 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7248 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7249 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7250 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7251 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
7252 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7253 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)7254 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
7255 return vaddl_s16(a, b);
7256 }
7257
7258 // CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
7259 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7260 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7261 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7262 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7263 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7264 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
7265 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7266 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)7267 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
7268 return vaddl_s32(a, b);
7269 }
7270
7271 // CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
7272 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7273 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7274 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7275 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)7276 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
7277 return vaddl_u8(a, b);
7278 }
7279
7280 // CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
7281 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7282 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7283 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7284 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7285 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7286 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
7287 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7288 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)7289 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
7290 return vaddl_u16(a, b);
7291 }
7292
7293 // CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
7294 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7295 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7296 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7297 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7298 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7299 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
7300 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7301 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)7302 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
7303 return vaddl_u32(a, b);
7304 }
7305
7306 // CHECK-LABEL: define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
7307 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7308 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7309 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7310 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7311 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
7312 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_high_s8(int8x16_t a,int8x16_t b)7313 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
7314 return vaddl_high_s8(a, b);
7315 }
7316
7317 // CHECK-LABEL: define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7318 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7319 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7320 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7321 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7322 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7323 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7324 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7325 // CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
7326 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
7327 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_high_s16(int16x8_t a,int16x8_t b)7328 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
7329 return vaddl_high_s16(a, b);
7330 }
7331
7332 // CHECK-LABEL: define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7333 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7334 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7335 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7336 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7337 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7338 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7339 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7340 // CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
7341 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
7342 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_high_s32(int32x4_t a,int32x4_t b)7343 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
7344 return vaddl_high_s32(a, b);
7345 }
7346
7347 // CHECK-LABEL: define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
7348 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7349 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7350 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7351 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7352 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
7353 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_high_u8(uint8x16_t a,uint8x16_t b)7354 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
7355 return vaddl_high_u8(a, b);
7356 }
7357
7358 // CHECK-LABEL: define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7359 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7360 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7361 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7362 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7363 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7364 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7365 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7366 // CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
7367 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
7368 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_high_u16(uint16x8_t a,uint16x8_t b)7369 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
7370 return vaddl_high_u16(a, b);
7371 }
7372
7373 // CHECK-LABEL: define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7374 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7375 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7376 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7377 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7378 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7379 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7380 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7381 // CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
7382 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
7383 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_high_u32(uint32x4_t a,uint32x4_t b)7384 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
7385 return vaddl_high_u32(a, b);
7386 }
7387
7388 // CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
7389 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7390 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
7391 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)7392 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
7393 return vaddw_s8(a, b);
7394 }
7395
7396 // CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
7397 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7398 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7399 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7400 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
7401 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)7402 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
7403 return vaddw_s16(a, b);
7404 }
7405
7406 // CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
7407 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7408 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7409 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7410 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
7411 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)7412 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
7413 return vaddw_s32(a, b);
7414 }
7415
7416 // CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
7417 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7418 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
7419 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)7420 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
7421 return vaddw_u8(a, b);
7422 }
7423
7424 // CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
7425 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7426 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7427 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7428 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
7429 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)7430 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
7431 return vaddw_u16(a, b);
7432 }
7433
7434 // CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
7435 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7436 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7437 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7438 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
7439 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)7440 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
7441 return vaddw_u32(a, b);
7442 }
7443
7444 // CHECK-LABEL: define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 {
7445 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7446 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7447 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
7448 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_high_s8(int16x8_t a,int8x16_t b)7449 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
7450 return vaddw_high_s8(a, b);
7451 }
7452
7453 // CHECK-LABEL: define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 {
7454 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7455 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7456 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7457 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7458 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
7459 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_high_s16(int32x4_t a,int16x8_t b)7460 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
7461 return vaddw_high_s16(a, b);
7462 }
7463
7464 // CHECK-LABEL: define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 {
7465 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7466 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7467 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7468 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7469 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]]
7470 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_high_s32(int64x2_t a,int32x4_t b)7471 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
7472 return vaddw_high_s32(a, b);
7473 }
7474
7475 // CHECK-LABEL: define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 {
7476 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7477 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7478 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
7479 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_high_u8(uint16x8_t a,uint8x16_t b)7480 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
7481 return vaddw_high_u8(a, b);
7482 }
7483
7484 // CHECK-LABEL: define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 {
7485 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7486 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7487 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7488 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7489 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
7490 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_high_u16(uint32x4_t a,uint16x8_t b)7491 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
7492 return vaddw_high_u16(a, b);
7493 }
7494
7495 // CHECK-LABEL: define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 {
7496 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7497 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7498 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7499 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7500 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]]
7501 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_high_u32(uint64x2_t a,uint32x4_t b)7502 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
7503 return vaddw_high_u32(a, b);
7504 }
7505
7506 // CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
7507 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7508 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7509 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7510 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)7511 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
7512 return vsubl_s8(a, b);
7513 }
7514
7515 // CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
7516 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7517 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7518 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7519 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7520 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7521 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
7522 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7523 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)7524 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
7525 return vsubl_s16(a, b);
7526 }
7527
7528 // CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
7529 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7530 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7531 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7532 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7533 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7534 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
7535 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7536 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)7537 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
7538 return vsubl_s32(a, b);
7539 }
7540
7541 // CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
7542 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7543 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7544 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7545 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)7546 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
7547 return vsubl_u8(a, b);
7548 }
7549
7550 // CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
7551 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7552 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7553 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7554 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7555 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7556 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
7557 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7558 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)7559 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
7560 return vsubl_u16(a, b);
7561 }
7562
7563 // CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
7564 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7565 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7566 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7567 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7568 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7569 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
7570 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7571 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)7572 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
7573 return vsubl_u32(a, b);
7574 }
7575
7576 // CHECK-LABEL: define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
7577 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7578 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7579 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7580 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7581 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
7582 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_high_s8(int8x16_t a,int8x16_t b)7583 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
7584 return vsubl_high_s8(a, b);
7585 }
7586
7587 // CHECK-LABEL: define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7588 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7589 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7590 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7591 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7592 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7593 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7594 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7595 // CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
7596 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
7597 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_high_s16(int16x8_t a,int16x8_t b)7598 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
7599 return vsubl_high_s16(a, b);
7600 }
7601
7602 // CHECK-LABEL: define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7603 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7604 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7605 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7606 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7607 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7608 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7609 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7610 // CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
7611 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
7612 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_high_s32(int32x4_t a,int32x4_t b)7613 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
7614 return vsubl_high_s32(a, b);
7615 }
7616
7617 // CHECK-LABEL: define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
7618 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7619 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7620 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7621 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7622 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
7623 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_high_u8(uint8x16_t a,uint8x16_t b)7624 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
7625 return vsubl_high_u8(a, b);
7626 }
7627
7628 // CHECK-LABEL: define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7629 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7630 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7631 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7632 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7633 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7634 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7635 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7636 // CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
7637 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
7638 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_high_u16(uint16x8_t a,uint16x8_t b)7639 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
7640 return vsubl_high_u16(a, b);
7641 }
7642
7643 // CHECK-LABEL: define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7644 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7645 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7646 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7647 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7648 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7649 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7650 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7651 // CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
7652 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
7653 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_high_u32(uint32x4_t a,uint32x4_t b)7654 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
7655 return vsubl_high_u32(a, b);
7656 }
7657
7658 // CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
7659 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7660 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
7661 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)7662 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
7663 return vsubw_s8(a, b);
7664 }
7665
7666 // CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
7667 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7668 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7669 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7670 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
7671 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)7672 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
7673 return vsubw_s16(a, b);
7674 }
7675
7676 // CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
7677 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7678 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7679 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7680 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7681 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)7682 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
7683 return vsubw_s32(a, b);
7684 }
7685
7686 // CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
7687 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7688 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
7689 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)7690 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
7691 return vsubw_u8(a, b);
7692 }
7693
7694 // CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
7695 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7696 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7697 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7698 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
7699 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)7700 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
7701 return vsubw_u16(a, b);
7702 }
7703
7704 // CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
7705 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7706 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7707 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7708 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7709 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)7710 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
7711 return vsubw_u32(a, b);
7712 }
7713
7714 // CHECK-LABEL: define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 {
7715 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7716 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7717 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7718 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_high_s8(int16x8_t a,int8x16_t b)7719 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
7720 return vsubw_high_s8(a, b);
7721 }
7722
7723 // CHECK-LABEL: define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 {
7724 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7725 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7726 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7727 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7728 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]]
7729 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_high_s16(int32x4_t a,int16x8_t b)7730 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
7731 return vsubw_high_s16(a, b);
7732 }
7733
7734 // CHECK-LABEL: define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 {
7735 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7736 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7737 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7738 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7739 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]]
7740 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_high_s32(int64x2_t a,int32x4_t b)7741 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
7742 return vsubw_high_s32(a, b);
7743 }
7744
7745 // CHECK-LABEL: define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 {
7746 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7747 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7748 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7749 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_high_u8(uint16x8_t a,uint8x16_t b)7750 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
7751 return vsubw_high_u8(a, b);
7752 }
7753
7754 // CHECK-LABEL: define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 {
7755 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7756 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7757 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7758 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7759 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]]
7760 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_high_u16(uint32x4_t a,uint16x8_t b)7761 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
7762 return vsubw_high_u16(a, b);
7763 }
7764
7765 // CHECK-LABEL: define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 {
7766 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7767 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7768 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7769 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7770 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]]
7771 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_high_u32(uint64x2_t a,uint32x4_t b)7772 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
7773 return vsubw_high_u32(a, b);
7774 }
7775
7776 // CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7777 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7778 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7779 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7780 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7781 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7782 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7783 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7784 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)7785 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
7786 return vaddhn_s16(a, b);
7787 }
7788
7789 // CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7790 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7791 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7792 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7793 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7794 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7795 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7796 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7797 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)7798 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
7799 return vaddhn_s32(a, b);
7800 }
7801
7802 // CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
7803 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7804 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7805 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7806 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7807 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7808 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7809 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7810 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)7811 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
7812 return vaddhn_s64(a, b);
7813 }
7814
7815 // CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7816 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7817 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7818 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7819 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7820 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7821 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7822 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7823 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)7824 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
7825 return vaddhn_u16(a, b);
7826 }
7827
7828 // CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7829 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7830 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7831 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7832 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7833 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7834 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7835 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7836 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)7837 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
7838 return vaddhn_u32(a, b);
7839 }
7840
7841 // CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
7842 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7843 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7844 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7845 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7846 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7847 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7848 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7849 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)7850 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
7851 return vaddhn_u64(a, b);
7852 }
7853
7854 // CHECK-LABEL: define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
7855 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7856 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7857 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7858 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7859 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7860 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7861 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7862 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7863 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7864 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7865 return vaddhn_high_s16(r, a, b);
7866 }
7867
7868 // CHECK-LABEL: define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
7869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7870 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7871 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7872 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7873 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7874 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7875 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7876 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7877 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7878 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7879 return vaddhn_high_s32(r, a, b);
7880 }
7881
7882 // CHECK-LABEL: define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
7883 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7884 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7885 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7886 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7887 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7888 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7889 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7890 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7891 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7892 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7893 return vaddhn_high_s64(r, a, b);
7894 }
7895
7896 // CHECK-LABEL: define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
7897 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7898 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7899 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7900 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7901 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7902 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7903 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7904 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7905 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7906 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7907 return vaddhn_high_u16(r, a, b);
7908 }
7909
7910 // CHECK-LABEL: define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
7911 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7912 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7913 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7914 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7915 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7916 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7917 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7918 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7919 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7920 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7921 return vaddhn_high_u32(r, a, b);
7922 }
7923
7924 // CHECK-LABEL: define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
7925 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7926 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7927 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7928 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7929 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7930 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7931 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7932 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7933 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7934 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7935 return vaddhn_high_u64(r, a, b);
7936 }
7937
7938 // CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7939 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7940 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7941 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7942 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7943 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
7944 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)7945 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
7946 return vraddhn_s16(a, b);
7947 }
7948
7949 // CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7950 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7951 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7952 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7953 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7954 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
7955 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7956 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
7957 // CHECK: ret <4 x i16> [[TMP2]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)7958 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
7959 return vraddhn_s32(a, b);
7960 }
7961
7962 // CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
7963 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7964 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7965 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7966 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7967 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
7968 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7969 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
7970 // CHECK: ret <2 x i32> [[TMP2]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)7971 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
7972 return vraddhn_s64(a, b);
7973 }
7974
7975 // CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7976 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7977 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7978 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7979 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7980 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
7981 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)7982 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
7983 return vraddhn_u16(a, b);
7984 }
7985
7986 // CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7987 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7988 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7989 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7990 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7991 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
7992 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7993 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
7994 // CHECK: ret <4 x i16> [[TMP2]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)7995 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
7996 return vraddhn_u32(a, b);
7997 }
7998
7999 // CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
8000 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8001 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8002 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8003 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8004 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
8005 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
8006 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
8007 // CHECK: ret <2 x i32> [[TMP2]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)8008 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
8009 return vraddhn_u64(a, b);
8010 }
8011
8012 // CHECK-LABEL: define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8013 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8014 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8015 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8016 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8017 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4
8018 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8019 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)8020 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
8021 return vraddhn_high_s16(r, a, b);
8022 }
8023
8024 // CHECK-LABEL: define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8025 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8026 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8027 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8028 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8029 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4
8030 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
8031 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
8032 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8033 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)8034 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
8035 return vraddhn_high_s32(r, a, b);
8036 }
8037
8038 // CHECK-LABEL: define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8039 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8040 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8041 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8042 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8043 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4
8044 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
8045 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
8046 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8047 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)8048 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
8049 return vraddhn_high_s64(r, a, b);
8050 }
8051
8052 // CHECK-LABEL: define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8053 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8054 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8055 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8056 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8057 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4
8058 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8059 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)8060 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
8061 return vraddhn_high_u16(r, a, b);
8062 }
8063
8064 // CHECK-LABEL: define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8065 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8066 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8067 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8068 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8069 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4
8070 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
8071 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
8072 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8073 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)8074 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
8075 return vraddhn_high_u32(r, a, b);
8076 }
8077
8078 // CHECK-LABEL: define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8079 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8080 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8081 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8082 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8083 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4
8084 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
8085 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
8086 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8087 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)8088 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
8089 return vraddhn_high_u64(r, a, b);
8090 }
8091
8092 // CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8093 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8094 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8095 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8096 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8097 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8098 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8099 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
8100 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)8101 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
8102 return vsubhn_s16(a, b);
8103 }
8104
8105 // CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8106 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8107 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8108 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8109 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8110 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8111 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
8112 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
8113 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)8114 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
8115 return vsubhn_s32(a, b);
8116 }
8117
8118 // CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
8119 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8120 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8121 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8122 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8123 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8124 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
8125 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
8126 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)8127 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
8128 return vsubhn_s64(a, b);
8129 }
8130
8131 // CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8132 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8133 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8134 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8135 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8136 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8137 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8138 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
8139 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)8140 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
8141 return vsubhn_u16(a, b);
8142 }
8143
8144 // CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8145 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8146 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8147 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8148 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8149 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8150 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
8151 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
8152 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)8153 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
8154 return vsubhn_u32(a, b);
8155 }
8156
8157 // CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
8158 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8159 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8160 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8161 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8162 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8163 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
8164 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
8165 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)8166 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
8167 return vsubhn_u64(a, b);
8168 }
8169
8170 // CHECK-LABEL: define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8171 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8172 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8173 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8174 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8175 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8176 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8177 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
8178 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8179 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)8180 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
8181 return vsubhn_high_s16(r, a, b);
8182 }
8183
8184 // CHECK-LABEL: define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8185 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8186 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8187 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8188 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8189 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8190 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
8191 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
8192 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8193 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)8194 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
8195 return vsubhn_high_s32(r, a, b);
8196 }
8197
8198 // CHECK-LABEL: define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8199 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8200 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8201 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8202 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8203 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8204 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
8205 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
8206 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8207 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)8208 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
8209 return vsubhn_high_s64(r, a, b);
8210 }
8211
8212 // CHECK-LABEL: define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8213 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8214 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8215 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8216 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8217 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8218 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8219 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
8220 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8221 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)8222 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
8223 return vsubhn_high_u16(r, a, b);
8224 }
8225
8226 // CHECK-LABEL: define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8227 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8228 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8229 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8230 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8231 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8232 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
8233 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
8234 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8235 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)8236 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
8237 return vsubhn_high_u32(r, a, b);
8238 }
8239
8240 // CHECK-LABEL: define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8241 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8242 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8243 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8244 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8245 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8246 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
8247 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
8248 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8249 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)8250 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
8251 return vsubhn_high_u64(r, a, b);
8252 }
8253
8254 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8255 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8256 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8257 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8258 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8259 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
8260 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)8261 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
8262 return vrsubhn_s16(a, b);
8263 }
8264
8265 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8266 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8267 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8268 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8269 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8270 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
8271 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
8272 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
8273 // CHECK: ret <4 x i16> [[TMP2]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)8274 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
8275 return vrsubhn_s32(a, b);
8276 }
8277
8278 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
8279 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8280 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8281 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8282 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8283 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
8284 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
8285 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
8286 // CHECK: ret <2 x i32> [[TMP2]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)8287 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
8288 return vrsubhn_s64(a, b);
8289 }
8290
8291 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8292 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8293 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8294 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8295 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8296 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
8297 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)8298 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
8299 return vrsubhn_u16(a, b);
8300 }
8301
8302 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8303 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8304 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8305 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8306 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8307 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
8308 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
8309 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
8310 // CHECK: ret <4 x i16> [[TMP2]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)8311 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
8312 return vrsubhn_u32(a, b);
8313 }
8314
8315 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
8316 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8317 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8318 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8319 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8320 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
8321 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
8322 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
8323 // CHECK: ret <2 x i32> [[TMP2]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)8324 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
8325 return vrsubhn_u64(a, b);
8326 }
8327
8328 // CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8329 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8330 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8331 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8332 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8333 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4
8334 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8335 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)8336 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
8337 return vrsubhn_high_s16(r, a, b);
8338 }
8339
8340 // CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8341 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8342 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8343 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8344 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8345 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4
8346 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
8347 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
8348 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8349 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)8350 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
8351 return vrsubhn_high_s32(r, a, b);
8352 }
8353
8354 // CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8355 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8356 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8357 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8358 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8359 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4
8360 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
8361 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
8362 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8363 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)8364 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
8365 return vrsubhn_high_s64(r, a, b);
8366 }
8367
8368 // CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8369 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8370 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8371 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8372 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8373 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4
8374 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8375 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)8376 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
8377 return vrsubhn_high_u16(r, a, b);
8378 }
8379
8380 // CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8381 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8382 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8383 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8384 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8385 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4
8386 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
8387 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
8388 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8389 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)8390 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
8391 return vrsubhn_high_u32(r, a, b);
8392 }
8393
8394 // CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8395 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8396 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8397 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8398 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8399 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4
8400 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
8401 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
8402 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8403 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)8404 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
8405 return vrsubhn_high_u64(r, a, b);
8406 }
8407
8408 // CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8409 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8410 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
8411 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)8412 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
8413 return vabdl_s8(a, b);
8414 }
8415 // CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
8416 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8417 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8418 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8419 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8420 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
8421 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
8422 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8423 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8424 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)8425 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
8426 return vabdl_s16(a, b);
8427 }
8428 // CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
8429 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8430 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8431 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8432 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8433 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
8434 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
8435 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8436 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8437 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)8438 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
8439 return vabdl_s32(a, b);
8440 }
8441 // CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
8442 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8443 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
8444 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)8445 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
8446 return vabdl_u8(a, b);
8447 }
8448 // CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
8449 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8450 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8451 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8452 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8453 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
8454 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
8455 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8456 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8457 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)8458 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
8459 return vabdl_u16(a, b);
8460 }
8461 // CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
8462 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8463 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8464 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8465 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8466 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
8467 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
8468 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8469 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8470 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)8471 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
8472 return vabdl_u32(a, b);
8473 }
8474
8475 // CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8476 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
8477 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8478 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
8479 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)8480 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8481 return vabal_s8(a, b, c);
8482 }
8483 // CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8484 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8485 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8486 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8487 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8488 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8489 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8490 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8491 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8492 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
8493 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8494 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8495 return vabal_s16(a, b, c);
8496 }
8497 // CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8498 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8499 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8500 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8501 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8502 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8503 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8504 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8505 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8506 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
8507 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8508 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8509 return vabal_s32(a, b, c);
8510 }
8511 // CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8512 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
8513 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8514 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
8515 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8516 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8517 return vabal_u8(a, b, c);
8518 }
8519 // CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8520 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8521 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8522 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8523 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8524 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8525 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8526 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8527 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8528 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
8529 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8530 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8531 return vabal_u16(a, b, c);
8532 }
8533 // CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8534 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8535 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8536 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8537 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8538 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8539 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8540 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8541 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8542 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
8543 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8544 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8545 return vabal_u32(a, b, c);
8546 }
8547
8548 // CHECK-LABEL: define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
8549 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8550 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8551 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8552 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8553 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_s8(int8x16_t a,int8x16_t b)8554 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
8555 return vabdl_high_s8(a, b);
8556 }
8557 // CHECK-LABEL: define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8558 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8559 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8560 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8561 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8562 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8563 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8564 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8565 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8566 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8567 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8568 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_s16(int16x8_t a,int16x8_t b)8569 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
8570 return vabdl_high_s16(a, b);
8571 }
8572 // CHECK-LABEL: define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8573 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8574 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8575 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8576 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8577 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8578 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8579 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8580 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8581 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8582 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8583 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_s32(int32x4_t a,int32x4_t b)8584 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
8585 return vabdl_high_s32(a, b);
8586 }
8587 // CHECK-LABEL: define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
8588 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8589 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8590 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8591 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8592 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_u8(uint8x16_t a,uint8x16_t b)8593 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
8594 return vabdl_high_u8(a, b);
8595 }
8596 // CHECK-LABEL: define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8597 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8598 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8599 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8600 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8601 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8602 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8603 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8604 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8605 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8606 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8607 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_u16(uint16x8_t a,uint16x8_t b)8608 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
8609 return vabdl_high_u16(a, b);
8610 }
8611 // CHECK-LABEL: define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8612 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8613 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8614 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8615 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8616 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8617 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8618 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8619 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8620 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8621 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8622 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_u32(uint32x4_t a,uint32x4_t b)8623 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
8624 return vabdl_high_u32(a, b);
8625 }
8626
8627 // CHECK-LABEL: define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8628 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8629 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8630 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8631 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
8632 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
8633 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vabal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8634 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8635 return vabal_high_s8(a, b, c);
8636 }
8637 // CHECK-LABEL: define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8638 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8639 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8640 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8641 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8642 // CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8643 // CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8644 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4
8645 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
8646 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8647 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8648 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
8649 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vabal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8650 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8651 return vabal_high_s16(a, b, c);
8652 }
8653 // CHECK-LABEL: define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8654 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8655 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8656 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8657 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8658 // CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8659 // CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8660 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4
8661 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
8662 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8663 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8664 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
8665 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vabal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8666 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8667 return vabal_high_s32(a, b, c);
8668 }
8669 // CHECK-LABEL: define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8670 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8671 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8672 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8673 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
8674 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
8675 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vabal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8676 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8677 return vabal_high_u8(a, b, c);
8678 }
8679 // CHECK-LABEL: define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8680 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8681 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8682 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8683 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8684 // CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8685 // CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8686 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4
8687 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
8688 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8689 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8690 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
8691 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vabal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8692 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8693 return vabal_high_u16(a, b, c);
8694 }
8695 // CHECK-LABEL: define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8696 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8697 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8698 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8699 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8700 // CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8701 // CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8702 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4
8703 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
8704 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8705 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8706 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
8707 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vabal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8708 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8709 return vabal_high_u32(a, b, c);
8710 }
8711
8712 // CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8713 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
8714 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)8715 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
8716 return vmull_s8(a, b);
8717 }
8718 // CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
8719 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8720 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8721 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8722 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8723 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
8724 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)8725 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
8726 return vmull_s16(a, b);
8727 }
8728 // CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
8729 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8730 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8731 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8732 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8733 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
8734 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)8735 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
8736 return vmull_s32(a, b);
8737 }
8738 // CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 {
8739 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
8740 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)8741 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
8742 return vmull_u8(a, b);
8743 }
8744 // CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 {
8745 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8746 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8747 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8748 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8749 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
8750 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)8751 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
8752 return vmull_u16(a, b);
8753 }
8754 // CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 {
8755 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8756 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8757 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8758 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8759 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
8760 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)8761 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
8762 return vmull_u32(a, b);
8763 }
8764
8765 // CHECK-LABEL: define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
8766 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8767 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8768 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8769 // CHECK: ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_s8(int8x16_t a,int8x16_t b)8770 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
8771 return vmull_high_s8(a, b);
8772 }
8773 // CHECK-LABEL: define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8774 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8775 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8776 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8777 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8778 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8779 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8780 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8781 // CHECK: ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_s16(int16x8_t a,int16x8_t b)8782 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
8783 return vmull_high_s16(a, b);
8784 }
8785 // CHECK-LABEL: define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8786 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8787 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8788 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8789 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8790 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8791 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8792 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8793 // CHECK: ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_s32(int32x4_t a,int32x4_t b)8794 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
8795 return vmull_high_s32(a, b);
8796 }
8797 // CHECK-LABEL: define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
8798 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8799 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8800 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8801 // CHECK: ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_u8(uint8x16_t a,uint8x16_t b)8802 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
8803 return vmull_high_u8(a, b);
8804 }
8805 // CHECK-LABEL: define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8806 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8807 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8808 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8809 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8810 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8811 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8812 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8813 // CHECK: ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_u16(uint16x8_t a,uint16x8_t b)8814 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
8815 return vmull_high_u16(a, b);
8816 }
8817 // CHECK-LABEL: define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8818 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8819 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8820 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8821 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8822 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8823 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8824 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8825 // CHECK: ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_u32(uint32x4_t a,uint32x4_t b)8826 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
8827 return vmull_high_u32(a, b);
8828 }
8829
8830 // CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8831 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8832 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8833 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)8834 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8835 return vmlal_s8(a, b, c);
8836 }
8837 // CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8838 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8839 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8840 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8841 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8842 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8843 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8844 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8845 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8846 return vmlal_s16(a, b, c);
8847 }
8848 // CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8849 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8850 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8851 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8852 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8853 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8854 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8855 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8856 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8857 return vmlal_s32(a, b, c);
8858 }
8859 // CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8860 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8861 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8862 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8863 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8864 return vmlal_u8(a, b, c);
8865 }
8866 // CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8867 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8868 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8869 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8870 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8871 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8872 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8873 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8874 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8875 return vmlal_u16(a, b, c);
8876 }
8877 // CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8878 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8879 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8880 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8881 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8882 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8883 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8884 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8885 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8886 return vmlal_u32(a, b, c);
8887 }
8888
8889 // CHECK-LABEL: define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8890 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8891 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8892 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8893 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8894 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8895 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8896 return vmlal_high_s8(a, b, c);
8897 }
8898 // CHECK-LABEL: define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8899 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8900 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8901 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8902 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8903 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8904 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8905 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
8906 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8907 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8908 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8909 return vmlal_high_s16(a, b, c);
8910 }
8911 // CHECK-LABEL: define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8912 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8913 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8914 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8915 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8916 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8917 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8918 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
8919 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8920 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8921 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8922 return vmlal_high_s32(a, b, c);
8923 }
8924 // CHECK-LABEL: define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8925 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8926 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8927 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8928 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8929 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8930 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8931 return vmlal_high_u8(a, b, c);
8932 }
8933 // CHECK-LABEL: define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8934 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8935 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8936 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8937 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8938 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8939 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8940 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
8941 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8942 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8943 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8944 return vmlal_high_u16(a, b, c);
8945 }
8946 // CHECK-LABEL: define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8947 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8948 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8949 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8950 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8951 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8952 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8953 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
8954 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8955 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8956 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8957 return vmlal_high_u32(a, b, c);
8958 }
8959
8960 // CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8961 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8962 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8963 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)8964 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8965 return vmlsl_s8(a, b, c);
8966 }
8967 // CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8968 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8969 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8970 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8971 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8972 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8973 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8974 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8975 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8976 return vmlsl_s16(a, b, c);
8977 }
8978 // CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8979 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8980 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8981 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8982 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8983 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8984 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8985 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8986 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8987 return vmlsl_s32(a, b, c);
8988 }
8989 // CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8990 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8991 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8992 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8993 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8994 return vmlsl_u8(a, b, c);
8995 }
8996 // CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8997 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8998 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8999 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9000 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9001 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
9002 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
9003 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)9004 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
9005 return vmlsl_u16(a, b, c);
9006 }
9007 // CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9008 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9009 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9010 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9011 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9012 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
9013 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
9014 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)9015 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
9016 return vmlsl_u32(a, b, c);
9017 }
9018
9019 // CHECK-LABEL: define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
9020 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9021 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9022 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
9023 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
9024 // CHECK: ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)9025 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
9026 return vmlsl_high_s8(a, b, c);
9027 }
9028 // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9029 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9030 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9031 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9032 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9033 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9034 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9035 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
9036 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
9037 // CHECK: ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)9038 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
9039 return vmlsl_high_s16(a, b, c);
9040 }
9041 // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9042 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9043 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9044 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9045 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9046 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9047 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9048 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
9049 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
9050 // CHECK: ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)9051 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
9052 return vmlsl_high_s32(a, b, c);
9053 }
9054 // CHECK-LABEL: define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
9055 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9056 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9057 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
9058 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
9059 // CHECK: ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)9060 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
9061 return vmlsl_high_u8(a, b, c);
9062 }
9063 // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9064 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9065 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9066 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9067 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9068 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9069 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9070 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
9071 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
9072 // CHECK: ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)9073 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
9074 return vmlsl_high_u16(a, b, c);
9075 }
9076 // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9077 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9078 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9079 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9080 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9081 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9082 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9083 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
9084 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
9085 // CHECK: ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)9086 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
9087 return vmlsl_high_u32(a, b, c);
9088 }
9089
9090 // CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
9091 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9092 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9093 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9094 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9095 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4
9096 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
9097 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
9098 // CHECK: ret <4 x i32> [[TMP2]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)9099 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
9100 return vqdmull_s16(a, b);
9101 }
9102 // CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
9103 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9104 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9105 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9106 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9107 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4
9108 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
9109 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
9110 // CHECK: ret <2 x i64> [[TMP2]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)9111 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
9112 return vqdmull_s32(a, b);
9113 }
9114
9115 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9116 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9117 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9118 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9119 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9120 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9121 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
9122 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9123 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
9124 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)9125 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9126 return vqdmlal_s16(a, b, c);
9127 }
9128
9129 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9130 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9131 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9132 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9133 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9134 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9135 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
9136 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9137 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
9138 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)9139 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9140 return vqdmlal_s32(a, b, c);
9141 }
9142
9143 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9144 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9145 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9146 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9147 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9148 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9149 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
9150 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9151 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
9152 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)9153 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9154 return vqdmlsl_s16(a, b, c);
9155 }
9156
9157 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9158 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9159 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9160 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9161 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9162 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9163 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
9164 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9165 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
9166 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)9167 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9168 return vqdmlsl_s32(a, b, c);
9169 }
9170
9171 // CHECK-LABEL: define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
9172 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9173 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9174 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9175 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9176 // CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9177 // CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9178 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]]) #4
9179 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
9180 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32>
9181 // CHECK: ret <4 x i32> [[TMP2]]
test_vqdmull_high_s16(int16x8_t a,int16x8_t b)9182 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
9183 return vqdmull_high_s16(a, b);
9184 }
9185 // CHECK-LABEL: define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
9186 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
9187 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9188 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9189 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9190 // CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9191 // CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9192 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]]) #4
9193 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
9194 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64>
9195 // CHECK: ret <2 x i64> [[TMP2]]
test_vqdmull_high_s32(int32x4_t a,int32x4_t b)9196 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
9197 return vqdmull_high_s32(a, b);
9198 }
9199
9200 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9201 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9202 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9203 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9204 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9205 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9206 // CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9207 // CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9208 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4
9209 // CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9210 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4
9211 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)9212 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
9213 return vqdmlal_high_s16(a, b, c);
9214 }
9215
9216 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9217 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9218 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9219 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9220 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9221 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9222 // CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9223 // CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9224 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4
9225 // CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9226 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4
9227 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)9228 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
9229 return vqdmlal_high_s32(a, b, c);
9230 }
9231
9232 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9233 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9234 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9235 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9236 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9237 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9238 // CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9239 // CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9240 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4
9241 // CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9242 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4
9243 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)9244 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
9245 return vqdmlsl_high_s16(a, b, c);
9246 }
9247
9248 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9249 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9250 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9251 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9252 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9253 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9254 // CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9255 // CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9256 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4
9257 // CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9258 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4
9259 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)9260 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
9261 return vqdmlsl_high_s32(a, b, c);
9262 }
9263
9264 // CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 {
9265 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
9266 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)9267 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
9268 return vmull_p8(a, b);
9269 }
9270
9271 // CHECK-LABEL: define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) #0 {
9272 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9273 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9274 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
9275 // CHECK: ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_p8(poly8x16_t a,poly8x16_t b)9276 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
9277 return vmull_high_p8(a, b);
9278 }
9279
9280 // CHECK-LABEL: define i64 @test_vaddd_s64(i64 %a, i64 %b) #0 {
9281 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
9282 // CHECK: ret i64 [[VADDD_I]]
test_vaddd_s64(int64_t a,int64_t b)9283 int64_t test_vaddd_s64(int64_t a, int64_t b) {
9284 return vaddd_s64(a, b);
9285 }
9286
9287 // CHECK-LABEL: define i64 @test_vaddd_u64(i64 %a, i64 %b) #0 {
9288 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
9289 // CHECK: ret i64 [[VADDD_I]]
test_vaddd_u64(uint64_t a,uint64_t b)9290 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
9291 return vaddd_u64(a, b);
9292 }
9293
9294 // CHECK-LABEL: define i64 @test_vsubd_s64(i64 %a, i64 %b) #0 {
9295 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
9296 // CHECK: ret i64 [[VSUBD_I]]
test_vsubd_s64(int64_t a,int64_t b)9297 int64_t test_vsubd_s64(int64_t a, int64_t b) {
9298 return vsubd_s64(a, b);
9299 }
9300
9301 // CHECK-LABEL: define i64 @test_vsubd_u64(i64 %a, i64 %b) #0 {
9302 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
9303 // CHECK: ret i64 [[VSUBD_I]]
test_vsubd_u64(uint64_t a,uint64_t b)9304 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
9305 return vsubd_u64(a, b);
9306 }
9307
9308 // CHECK-LABEL: define i8 @test_vqaddb_s8(i8 %a, i8 %b) #0 {
9309 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9310 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9311 // CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9312 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
9313 // CHECK: ret i8 [[TMP2]]
test_vqaddb_s8(int8_t a,int8_t b)9314 int8_t test_vqaddb_s8(int8_t a, int8_t b) {
9315 return vqaddb_s8(a, b);
9316 }
9317
9318 // CHECK-LABEL: define i16 @test_vqaddh_s16(i16 %a, i16 %b) #0 {
9319 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9320 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9321 // CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9322 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
9323 // CHECK: ret i16 [[TMP2]]
test_vqaddh_s16(int16_t a,int16_t b)9324 int16_t test_vqaddh_s16(int16_t a, int16_t b) {
9325 return vqaddh_s16(a, b);
9326 }
9327
9328 // CHECK-LABEL: define i32 @test_vqadds_s32(i32 %a, i32 %b) #0 {
9329 // CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) #4
9330 // CHECK: ret i32 [[VQADDS_S32_I]]
test_vqadds_s32(int32_t a,int32_t b)9331 int32_t test_vqadds_s32(int32_t a, int32_t b) {
9332 return vqadds_s32(a, b);
9333 }
9334
9335 // CHECK-LABEL: define i64 @test_vqaddd_s64(i64 %a, i64 %b) #0 {
9336 // CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) #4
9337 // CHECK: ret i64 [[VQADDD_S64_I]]
test_vqaddd_s64(int64_t a,int64_t b)9338 int64_t test_vqaddd_s64(int64_t a, int64_t b) {
9339 return vqaddd_s64(a, b);
9340 }
9341
9342 // CHECK-LABEL: define i8 @test_vqaddb_u8(i8 %a, i8 %b) #0 {
9343 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9344 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9345 // CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9346 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
9347 // CHECK: ret i8 [[TMP2]]
test_vqaddb_u8(uint8_t a,uint8_t b)9348 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
9349 return vqaddb_u8(a, b);
9350 }
9351
9352 // CHECK-LABEL: define i16 @test_vqaddh_u16(i16 %a, i16 %b) #0 {
9353 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9354 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9355 // CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9356 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
9357 // CHECK: ret i16 [[TMP2]]
test_vqaddh_u16(uint16_t a,uint16_t b)9358 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
9359 return vqaddh_u16(a, b);
9360 }
9361
9362 // CHECK-LABEL: define i32 @test_vqadds_u32(i32 %a, i32 %b) #0 {
9363 // CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) #4
9364 // CHECK: ret i32 [[VQADDS_U32_I]]
test_vqadds_u32(uint32_t a,uint32_t b)9365 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
9366 return vqadds_u32(a, b);
9367 }
9368
9369 // CHECK-LABEL: define i64 @test_vqaddd_u64(i64 %a, i64 %b) #0 {
9370 // CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) #4
9371 // CHECK: ret i64 [[VQADDD_U64_I]]
test_vqaddd_u64(uint64_t a,uint64_t b)9372 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
9373 return vqaddd_u64(a, b);
9374 }
9375
9376 // CHECK-LABEL: define i8 @test_vqsubb_s8(i8 %a, i8 %b) #0 {
9377 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9378 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9379 // CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9380 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
9381 // CHECK: ret i8 [[TMP2]]
test_vqsubb_s8(int8_t a,int8_t b)9382 int8_t test_vqsubb_s8(int8_t a, int8_t b) {
9383 return vqsubb_s8(a, b);
9384 }
9385
9386 // CHECK-LABEL: define i16 @test_vqsubh_s16(i16 %a, i16 %b) #0 {
9387 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9388 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9389 // CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9390 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
9391 // CHECK: ret i16 [[TMP2]]
test_vqsubh_s16(int16_t a,int16_t b)9392 int16_t test_vqsubh_s16(int16_t a, int16_t b) {
9393 return vqsubh_s16(a, b);
9394 }
9395
9396 // CHECK-LABEL: define i32 @test_vqsubs_s32(i32 %a, i32 %b) #0 {
9397 // CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) #4
9398 // CHECK: ret i32 [[VQSUBS_S32_I]]
test_vqsubs_s32(int32_t a,int32_t b)9399 int32_t test_vqsubs_s32(int32_t a, int32_t b) {
9400 return vqsubs_s32(a, b);
9401 }
9402
9403 // CHECK-LABEL: define i64 @test_vqsubd_s64(i64 %a, i64 %b) #0 {
9404 // CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) #4
9405 // CHECK: ret i64 [[VQSUBD_S64_I]]
test_vqsubd_s64(int64_t a,int64_t b)9406 int64_t test_vqsubd_s64(int64_t a, int64_t b) {
9407 return vqsubd_s64(a, b);
9408 }
9409
9410 // CHECK-LABEL: define i8 @test_vqsubb_u8(i8 %a, i8 %b) #0 {
9411 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9412 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9413 // CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9414 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
9415 // CHECK: ret i8 [[TMP2]]
test_vqsubb_u8(uint8_t a,uint8_t b)9416 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
9417 return vqsubb_u8(a, b);
9418 }
9419
9420 // CHECK-LABEL: define i16 @test_vqsubh_u16(i16 %a, i16 %b) #0 {
9421 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9422 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9423 // CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9424 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
9425 // CHECK: ret i16 [[TMP2]]
test_vqsubh_u16(uint16_t a,uint16_t b)9426 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
9427 return vqsubh_u16(a, b);
9428 }
9429
9430 // CHECK-LABEL: define i32 @test_vqsubs_u32(i32 %a, i32 %b) #0 {
9431 // CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) #4
9432 // CHECK: ret i32 [[VQSUBS_U32_I]]
test_vqsubs_u32(uint32_t a,uint32_t b)9433 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
9434 return vqsubs_u32(a, b);
9435 }
9436
9437 // CHECK-LABEL: define i64 @test_vqsubd_u64(i64 %a, i64 %b) #0 {
9438 // CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) #4
9439 // CHECK: ret i64 [[VQSUBD_U64_I]]
test_vqsubd_u64(uint64_t a,uint64_t b)9440 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
9441 return vqsubd_u64(a, b);
9442 }
9443
9444 // CHECK-LABEL: define i64 @test_vshld_s64(i64 %a, i64 %b) #0 {
9445 // CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) #4
9446 // CHECK: ret i64 [[VSHLD_S64_I]]
test_vshld_s64(int64_t a,int64_t b)9447 int64_t test_vshld_s64(int64_t a, int64_t b) {
9448 return vshld_s64(a, b);
9449 }
9450
9451 // CHECK-LABEL: define i64 @test_vshld_u64(i64 %a, i64 %b) #0 {
9452 // CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) #4
9453 // CHECK: ret i64 [[VSHLD_U64_I]]
test_vshld_u64(uint64_t a,uint64_t b)9454 uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
9455 return vshld_u64(a, b);
9456 }
9457
9458 // CHECK-LABEL: define i8 @test_vqshlb_s8(i8 %a, i8 %b) #0 {
9459 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9460 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9461 // CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9462 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
9463 // CHECK: ret i8 [[TMP2]]
test_vqshlb_s8(int8_t a,int8_t b)9464 int8_t test_vqshlb_s8(int8_t a, int8_t b) {
9465 return vqshlb_s8(a, b);
9466 }
9467
9468 // CHECK-LABEL: define i16 @test_vqshlh_s16(i16 %a, i16 %b) #0 {
9469 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9470 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9471 // CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9472 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
9473 // CHECK: ret i16 [[TMP2]]
test_vqshlh_s16(int16_t a,int16_t b)9474 int16_t test_vqshlh_s16(int16_t a, int16_t b) {
9475 return vqshlh_s16(a, b);
9476 }
9477
9478 // CHECK-LABEL: define i32 @test_vqshls_s32(i32 %a, i32 %b) #0 {
9479 // CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) #4
9480 // CHECK: ret i32 [[VQSHLS_S32_I]]
test_vqshls_s32(int32_t a,int32_t b)9481 int32_t test_vqshls_s32(int32_t a, int32_t b) {
9482 return vqshls_s32(a, b);
9483 }
9484
9485 // CHECK-LABEL: define i64 @test_vqshld_s64(i64 %a, i64 %b) #0 {
9486 // CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) #4
9487 // CHECK: ret i64 [[VQSHLD_S64_I]]
test_vqshld_s64(int64_t a,int64_t b)9488 int64_t test_vqshld_s64(int64_t a, int64_t b) {
9489 return vqshld_s64(a, b);
9490 }
9491
9492 // CHECK-LABEL: define i8 @test_vqshlb_u8(i8 %a, i8 %b) #0 {
9493 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9494 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9495 // CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9496 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
9497 // CHECK: ret i8 [[TMP2]]
test_vqshlb_u8(uint8_t a,uint8_t b)9498 uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
9499 return vqshlb_u8(a, b);
9500 }
9501
9502 // CHECK-LABEL: define i16 @test_vqshlh_u16(i16 %a, i16 %b) #0 {
9503 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9504 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9505 // CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9506 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
9507 // CHECK: ret i16 [[TMP2]]
test_vqshlh_u16(uint16_t a,uint16_t b)9508 uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
9509 return vqshlh_u16(a, b);
9510 }
9511
9512 // CHECK-LABEL: define i32 @test_vqshls_u32(i32 %a, i32 %b) #0 {
9513 // CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) #4
9514 // CHECK: ret i32 [[VQSHLS_U32_I]]
test_vqshls_u32(uint32_t a,uint32_t b)9515 uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
9516 return vqshls_u32(a, b);
9517 }
9518
9519 // CHECK-LABEL: define i64 @test_vqshld_u64(i64 %a, i64 %b) #0 {
9520 // CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) #4
9521 // CHECK: ret i64 [[VQSHLD_U64_I]]
test_vqshld_u64(uint64_t a,uint64_t b)9522 uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
9523 return vqshld_u64(a, b);
9524 }
9525
9526 // CHECK-LABEL: define i64 @test_vrshld_s64(i64 %a, i64 %b) #0 {
9527 // CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) #4
9528 // CHECK: ret i64 [[VRSHLD_S64_I]]
test_vrshld_s64(int64_t a,int64_t b)9529 int64_t test_vrshld_s64(int64_t a, int64_t b) {
9530 return vrshld_s64(a, b);
9531 }
9532
9533
9534 // CHECK-LABEL: define i64 @test_vrshld_u64(i64 %a, i64 %b) #0 {
9535 // CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) #4
9536 // CHECK: ret i64 [[VRSHLD_U64_I]]
test_vrshld_u64(uint64_t a,uint64_t b)9537 uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
9538 return vrshld_u64(a, b);
9539 }
9540
9541 // CHECK-LABEL: define i8 @test_vqrshlb_s8(i8 %a, i8 %b) #0 {
9542 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9543 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9544 // CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9545 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
9546 // CHECK: ret i8 [[TMP2]]
test_vqrshlb_s8(int8_t a,int8_t b)9547 int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
9548 return vqrshlb_s8(a, b);
9549 }
9550
9551 // CHECK-LABEL: define i16 @test_vqrshlh_s16(i16 %a, i16 %b) #0 {
9552 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9553 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9554 // CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9555 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
9556 // CHECK: ret i16 [[TMP2]]
test_vqrshlh_s16(int16_t a,int16_t b)9557 int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
9558 return vqrshlh_s16(a, b);
9559 }
9560
9561 // CHECK-LABEL: define i32 @test_vqrshls_s32(i32 %a, i32 %b) #0 {
9562 // CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) #4
9563 // CHECK: ret i32 [[VQRSHLS_S32_I]]
test_vqrshls_s32(int32_t a,int32_t b)9564 int32_t test_vqrshls_s32(int32_t a, int32_t b) {
9565 return vqrshls_s32(a, b);
9566 }
9567
9568 // CHECK-LABEL: define i64 @test_vqrshld_s64(i64 %a, i64 %b) #0 {
9569 // CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) #4
9570 // CHECK: ret i64 [[VQRSHLD_S64_I]]
test_vqrshld_s64(int64_t a,int64_t b)9571 int64_t test_vqrshld_s64(int64_t a, int64_t b) {
9572 return vqrshld_s64(a, b);
9573 }
9574
9575 // CHECK-LABEL: define i8 @test_vqrshlb_u8(i8 %a, i8 %b) #0 {
9576 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9577 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9578 // CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9579 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
9580 // CHECK: ret i8 [[TMP2]]
test_vqrshlb_u8(uint8_t a,uint8_t b)9581 uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
9582 return vqrshlb_u8(a, b);
9583 }
9584
9585 // CHECK-LABEL: define i16 @test_vqrshlh_u16(i16 %a, i16 %b) #0 {
9586 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9587 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9588 // CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9589 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
9590 // CHECK: ret i16 [[TMP2]]
test_vqrshlh_u16(uint16_t a,uint16_t b)9591 uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
9592 return vqrshlh_u16(a, b);
9593 }
9594
9595 // CHECK-LABEL: define i32 @test_vqrshls_u32(i32 %a, i32 %b) #0 {
9596 // CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) #4
9597 // CHECK: ret i32 [[VQRSHLS_U32_I]]
test_vqrshls_u32(uint32_t a,uint32_t b)9598 uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
9599 return vqrshls_u32(a, b);
9600 }
9601
9602 // CHECK-LABEL: define i64 @test_vqrshld_u64(i64 %a, i64 %b) #0 {
9603 // CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) #4
9604 // CHECK: ret i64 [[VQRSHLD_U64_I]]
test_vqrshld_u64(uint64_t a,uint64_t b)9605 uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
9606 return vqrshld_u64(a, b);
9607 }
9608
9609 // CHECK-LABEL: define i64 @test_vpaddd_s64(<2 x i64> %a) #0 {
9610 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9611 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9612 // CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
9613 // CHECK: ret i64 [[VPADDD_S64_I]]
test_vpaddd_s64(int64x2_t a)9614 int64_t test_vpaddd_s64(int64x2_t a) {
9615 return vpaddd_s64(a);
9616 }
9617
9618 // CHECK-LABEL: define float @test_vpadds_f32(<2 x float> %a) #0 {
9619 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9620 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9621 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 0
9622 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 1
9623 // CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
9624 // CHECK: ret float [[VPADDD_I]]
test_vpadds_f32(float32x2_t a)9625 float32_t test_vpadds_f32(float32x2_t a) {
9626 return vpadds_f32(a);
9627 }
9628
9629 // CHECK-LABEL: define double @test_vpaddd_f64(<2 x double> %a) #0 {
9630 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9631 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9632 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 0
9633 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 1
9634 // CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
9635 // CHECK: ret double [[VPADDD_I]]
test_vpaddd_f64(float64x2_t a)9636 float64_t test_vpaddd_f64(float64x2_t a) {
9637 return vpaddd_f64(a);
9638 }
9639
9640 // CHECK-LABEL: define float @test_vpmaxnms_f32(<2 x float> %a) #0 {
9641 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9642 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9643 // CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
9644 // CHECK: ret float [[VPMAXNMS_F32_I]]
test_vpmaxnms_f32(float32x2_t a)9645 float32_t test_vpmaxnms_f32(float32x2_t a) {
9646 return vpmaxnms_f32(a);
9647 }
9648
9649 // CHECK-LABEL: define double @test_vpmaxnmqd_f64(<2 x double> %a) #0 {
9650 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9651 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9652 // CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
9653 // CHECK: ret double [[VPMAXNMQD_F64_I]]
test_vpmaxnmqd_f64(float64x2_t a)9654 float64_t test_vpmaxnmqd_f64(float64x2_t a) {
9655 return vpmaxnmqd_f64(a);
9656 }
9657
9658 // CHECK-LABEL: define float @test_vpmaxs_f32(<2 x float> %a) #0 {
9659 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9660 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9661 // CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4
9662 // CHECK: ret float [[VPMAXS_F32_I]]
test_vpmaxs_f32(float32x2_t a)9663 float32_t test_vpmaxs_f32(float32x2_t a) {
9664 return vpmaxs_f32(a);
9665 }
9666
9667 // CHECK-LABEL: define double @test_vpmaxqd_f64(<2 x double> %a) #0 {
9668 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9669 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9670 // CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4
9671 // CHECK: ret double [[VPMAXQD_F64_I]]
test_vpmaxqd_f64(float64x2_t a)9672 float64_t test_vpmaxqd_f64(float64x2_t a) {
9673 return vpmaxqd_f64(a);
9674 }
9675
9676 // CHECK-LABEL: define float @test_vpminnms_f32(<2 x float> %a) #0 {
9677 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9678 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9679 // CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
9680 // CHECK: ret float [[VPMINNMS_F32_I]]
test_vpminnms_f32(float32x2_t a)9681 float32_t test_vpminnms_f32(float32x2_t a) {
9682 return vpminnms_f32(a);
9683 }
9684
9685 // CHECK-LABEL: define double @test_vpminnmqd_f64(<2 x double> %a) #0 {
9686 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9687 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9688 // CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
9689 // CHECK: ret double [[VPMINNMQD_F64_I]]
test_vpminnmqd_f64(float64x2_t a)9690 float64_t test_vpminnmqd_f64(float64x2_t a) {
9691 return vpminnmqd_f64(a);
9692 }
9693
9694 // CHECK-LABEL: define float @test_vpmins_f32(<2 x float> %a) #0 {
9695 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9696 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9697 // CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4
9698 // CHECK: ret float [[VPMINS_F32_I]]
test_vpmins_f32(float32x2_t a)9699 float32_t test_vpmins_f32(float32x2_t a) {
9700 return vpmins_f32(a);
9701 }
9702
9703 // CHECK-LABEL: define double @test_vpminqd_f64(<2 x double> %a) #0 {
9704 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9705 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9706 // CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4
9707 // CHECK: ret double [[VPMINQD_F64_I]]
test_vpminqd_f64(float64x2_t a)9708 float64_t test_vpminqd_f64(float64x2_t a) {
9709 return vpminqd_f64(a);
9710 }
9711
9712 // CHECK-LABEL: define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) #0 {
9713 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9714 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9715 // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9716 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
9717 // CHECK: ret i16 [[TMP2]]
test_vqdmulhh_s16(int16_t a,int16_t b)9718 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
9719 return vqdmulhh_s16(a, b);
9720 }
9721
9722 // CHECK-LABEL: define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) #0 {
9723 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) #4
9724 // CHECK: ret i32 [[VQDMULHS_S32_I]]
test_vqdmulhs_s32(int32_t a,int32_t b)9725 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
9726 return vqdmulhs_s32(a, b);
9727 }
9728
9729 // CHECK-LABEL: define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) #0 {
9730 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9731 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9732 // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9733 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
9734 // CHECK: ret i16 [[TMP2]]
test_vqrdmulhh_s16(int16_t a,int16_t b)9735 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
9736 return vqrdmulhh_s16(a, b);
9737 }
9738
9739 // CHECK-LABEL: define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) #0 {
9740 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) #4
9741 // CHECK: ret i32 [[VQRDMULHS_S32_I]]
test_vqrdmulhs_s32(int32_t a,int32_t b)9742 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
9743 return vqrdmulhs_s32(a, b);
9744 }
9745
9746 // CHECK-LABEL: define float @test_vmulxs_f32(float %a, float %b) #0 {
9747 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) #4
9748 // CHECK: ret float [[VMULXS_F32_I]]
test_vmulxs_f32(float32_t a,float32_t b)9749 float32_t test_vmulxs_f32(float32_t a, float32_t b) {
9750 return vmulxs_f32(a, b);
9751 }
9752
9753 // CHECK-LABEL: define double @test_vmulxd_f64(double %a, double %b) #0 {
9754 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) #4
9755 // CHECK: ret double [[VMULXD_F64_I]]
test_vmulxd_f64(float64_t a,float64_t b)9756 float64_t test_vmulxd_f64(float64_t a, float64_t b) {
9757 return vmulxd_f64(a, b);
9758 }
9759
9760 // CHECK-LABEL: define <1 x double> @test_vmulx_f64(<1 x double> %a, <1 x double> %b) #0 {
9761 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
9762 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
9763 // CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
9764 // CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
9765 // CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> [[VMULX_I]], <1 x double> [[VMULX1_I]]) #4
9766 // CHECK: ret <1 x double> [[VMULX2_I]]
test_vmulx_f64(float64x1_t a,float64x1_t b)9767 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
9768 return vmulx_f64(a, b);
9769 }
9770
9771 // CHECK-LABEL: define float @test_vrecpss_f32(float %a, float %b) #0 {
9772 // CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) #4
9773 // CHECK: ret float [[VRECPS_I]]
test_vrecpss_f32(float32_t a,float32_t b)9774 float32_t test_vrecpss_f32(float32_t a, float32_t b) {
9775 return vrecpss_f32(a, b);
9776 }
9777
9778 // CHECK-LABEL: define double @test_vrecpsd_f64(double %a, double %b) #0 {
9779 // CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) #4
9780 // CHECK: ret double [[VRECPS_I]]
test_vrecpsd_f64(float64_t a,float64_t b)9781 float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
9782 return vrecpsd_f64(a, b);
9783 }
9784
9785 // CHECK-LABEL: define float @test_vrsqrtss_f32(float %a, float %b) #0 {
9786 // CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) #4
9787 // CHECK: ret float [[VRSQRTSS_F32_I]]
test_vrsqrtss_f32(float32_t a,float32_t b)9788 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
9789 return vrsqrtss_f32(a, b);
9790 }
9791
9792 // CHECK-LABEL: define double @test_vrsqrtsd_f64(double %a, double %b) #0 {
9793 // CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) #4
9794 // CHECK: ret double [[VRSQRTSD_F64_I]]
test_vrsqrtsd_f64(float64_t a,float64_t b)9795 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
9796 return vrsqrtsd_f64(a, b);
9797 }
9798
9799 // CHECK-LABEL: define float @test_vcvts_f32_s32(i32 %a) #0 {
9800 // CHECK: [[TMP0:%.*]] = sitofp i32 %a to float
9801 // CHECK: ret float [[TMP0]]
test_vcvts_f32_s32(int32_t a)9802 float32_t test_vcvts_f32_s32(int32_t a) {
9803 return vcvts_f32_s32(a);
9804 }
9805
9806 // CHECK-LABEL: define double @test_vcvtd_f64_s64(i64 %a) #0 {
9807 // CHECK: [[TMP0:%.*]] = sitofp i64 %a to double
9808 // CHECK: ret double [[TMP0]]
test_vcvtd_f64_s64(int64_t a)9809 float64_t test_vcvtd_f64_s64(int64_t a) {
9810 return vcvtd_f64_s64(a);
9811 }
9812
9813 // CHECK-LABEL: define float @test_vcvts_f32_u32(i32 %a) #0 {
9814 // CHECK: [[TMP0:%.*]] = uitofp i32 %a to float
9815 // CHECK: ret float [[TMP0]]
test_vcvts_f32_u32(uint32_t a)9816 float32_t test_vcvts_f32_u32(uint32_t a) {
9817 return vcvts_f32_u32(a);
9818 }
9819
9820 // CHECK-LABEL: define double @test_vcvtd_f64_u64(i64 %a) #0 {
9821 // CHECK: [[TMP0:%.*]] = uitofp i64 %a to double
9822 // CHECK: ret double [[TMP0]]
test_vcvtd_f64_u64(uint64_t a)9823 float64_t test_vcvtd_f64_u64(uint64_t a) {
9824 return vcvtd_f64_u64(a);
9825 }
9826
9827 // CHECK-LABEL: define float @test_vrecpes_f32(float %a) #0 {
9828 // CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) #4
9829 // CHECK: ret float [[VRECPES_F32_I]]
test_vrecpes_f32(float32_t a)9830 float32_t test_vrecpes_f32(float32_t a) {
9831 return vrecpes_f32(a);
9832 }
9833
9834 // CHECK-LABEL: define double @test_vrecped_f64(double %a) #0 {
9835 // CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) #4
9836 // CHECK: ret double [[VRECPED_F64_I]]
test_vrecped_f64(float64_t a)9837 float64_t test_vrecped_f64(float64_t a) {
9838 return vrecped_f64(a);
9839 }
9840
9841 // CHECK-LABEL: define float @test_vrecpxs_f32(float %a) #0 {
9842 // CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) #4
9843 // CHECK: ret float [[VRECPXS_F32_I]]
test_vrecpxs_f32(float32_t a)9844 float32_t test_vrecpxs_f32(float32_t a) {
9845 return vrecpxs_f32(a);
9846 }
9847
9848 // CHECK-LABEL: define double @test_vrecpxd_f64(double %a) #0 {
9849 // CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) #4
9850 // CHECK: ret double [[VRECPXD_F64_I]]
test_vrecpxd_f64(float64_t a)9851 float64_t test_vrecpxd_f64(float64_t a) {
9852 return vrecpxd_f64(a);
9853 }
9854
9855 // CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 {
9856 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9857 // CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9858 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4
9859 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)9860 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
9861 return vrsqrte_u32(a);
9862 }
9863
9864 // CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 {
9865 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9866 // CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9867 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4
9868 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)9869 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
9870 return vrsqrteq_u32(a);
9871 }
9872
9873 // CHECK-LABEL: define float @test_vrsqrtes_f32(float %a) #0 {
9874 // CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) #4
9875 // CHECK: ret float [[VRSQRTES_F32_I]]
test_vrsqrtes_f32(float32_t a)9876 float32_t test_vrsqrtes_f32(float32_t a) {
9877 return vrsqrtes_f32(a);
9878 }
9879
9880 // CHECK-LABEL: define double @test_vrsqrted_f64(double %a) #0 {
9881 // CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) #4
9882 // CHECK: ret double [[VRSQRTED_F64_I]]
test_vrsqrted_f64(float64_t a)9883 float64_t test_vrsqrted_f64(float64_t a) {
9884 return vrsqrted_f64(a);
9885 }
9886
9887 // CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 {
9888 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9889 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9890 // CHECK: ret <16 x i8> [[TMP1]]
test_vld1q_u8(uint8_t const * a)9891 uint8x16_t test_vld1q_u8(uint8_t const *a) {
9892 return vld1q_u8(a);
9893 }
9894
9895 // CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 {
9896 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9897 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9898 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9899 // CHECK: ret <8 x i16> [[TMP2]]
test_vld1q_u16(uint16_t const * a)9900 uint16x8_t test_vld1q_u16(uint16_t const *a) {
9901 return vld1q_u16(a);
9902 }
9903
9904 // CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 {
9905 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
9906 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9907 // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9908 // CHECK: ret <4 x i32> [[TMP2]]
test_vld1q_u32(uint32_t const * a)9909 uint32x4_t test_vld1q_u32(uint32_t const *a) {
9910 return vld1q_u32(a);
9911 }
9912
9913 // CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 {
9914 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
9915 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9916 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9917 // CHECK: ret <2 x i64> [[TMP2]]
test_vld1q_u64(uint64_t const * a)9918 uint64x2_t test_vld1q_u64(uint64_t const *a) {
9919 return vld1q_u64(a);
9920 }
9921
9922 // CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 {
9923 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9924 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9925 // CHECK: ret <16 x i8> [[TMP1]]
test_vld1q_s8(int8_t const * a)9926 int8x16_t test_vld1q_s8(int8_t const *a) {
9927 return vld1q_s8(a);
9928 }
9929
9930 // CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 {
9931 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9932 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9933 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9934 // CHECK: ret <8 x i16> [[TMP2]]
test_vld1q_s16(int16_t const * a)9935 int16x8_t test_vld1q_s16(int16_t const *a) {
9936 return vld1q_s16(a);
9937 }
9938
9939 // CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 {
9940 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
9941 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9942 // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9943 // CHECK: ret <4 x i32> [[TMP2]]
test_vld1q_s32(int32_t const * a)9944 int32x4_t test_vld1q_s32(int32_t const *a) {
9945 return vld1q_s32(a);
9946 }
9947
9948 // CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 {
9949 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
9950 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9951 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9952 // CHECK: ret <2 x i64> [[TMP2]]
test_vld1q_s64(int64_t const * a)9953 int64x2_t test_vld1q_s64(int64_t const *a) {
9954 return vld1q_s64(a);
9955 }
9956
9957 // CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 {
9958 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
9959 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9960 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9961 // CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
9962 // CHECK: ret <8 x half> [[TMP3]]
test_vld1q_f16(float16_t const * a)9963 float16x8_t test_vld1q_f16(float16_t const *a) {
9964 return vld1q_f16(a);
9965 }
9966
9967 // CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 {
9968 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
9969 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
9970 // CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]]
9971 // CHECK: ret <4 x float> [[TMP2]]
test_vld1q_f32(float32_t const * a)9972 float32x4_t test_vld1q_f32(float32_t const *a) {
9973 return vld1q_f32(a);
9974 }
9975
9976 // CHECK-LABEL: define <2 x double> @test_vld1q_f64(double* %a) #0 {
9977 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
9978 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
9979 // CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]]
9980 // CHECK: ret <2 x double> [[TMP2]]
test_vld1q_f64(float64_t const * a)9981 float64x2_t test_vld1q_f64(float64_t const *a) {
9982 return vld1q_f64(a);
9983 }
9984
9985 // CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 {
9986 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9987 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9988 // CHECK: ret <16 x i8> [[TMP1]]
test_vld1q_p8(poly8_t const * a)9989 poly8x16_t test_vld1q_p8(poly8_t const *a) {
9990 return vld1q_p8(a);
9991 }
9992
9993 // CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 {
9994 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9995 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9996 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9997 // CHECK: ret <8 x i16> [[TMP2]]
test_vld1q_p16(poly16_t const * a)9998 poly16x8_t test_vld1q_p16(poly16_t const *a) {
9999 return vld1q_p16(a);
10000 }
10001
10002 // CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 {
10003 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10004 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
10005 // CHECK: ret <8 x i8> [[TMP1]]
test_vld1_u8(uint8_t const * a)10006 uint8x8_t test_vld1_u8(uint8_t const *a) {
10007 return vld1_u8(a);
10008 }
10009
10010 // CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 {
10011 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10012 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10013 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10014 // CHECK: ret <4 x i16> [[TMP2]]
test_vld1_u16(uint16_t const * a)10015 uint16x4_t test_vld1_u16(uint16_t const *a) {
10016 return vld1_u16(a);
10017 }
10018
10019 // CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 {
10020 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10021 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10022 // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
10023 // CHECK: ret <2 x i32> [[TMP2]]
test_vld1_u32(uint32_t const * a)10024 uint32x2_t test_vld1_u32(uint32_t const *a) {
10025 return vld1_u32(a);
10026 }
10027
10028 // CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 {
10029 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10030 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10031 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
10032 // CHECK: ret <1 x i64> [[TMP2]]
test_vld1_u64(uint64_t const * a)10033 uint64x1_t test_vld1_u64(uint64_t const *a) {
10034 return vld1_u64(a);
10035 }
10036
10037 // CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 {
10038 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10039 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
10040 // CHECK: ret <8 x i8> [[TMP1]]
test_vld1_s8(int8_t const * a)10041 int8x8_t test_vld1_s8(int8_t const *a) {
10042 return vld1_s8(a);
10043 }
10044
10045 // CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 {
10046 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10047 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10048 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10049 // CHECK: ret <4 x i16> [[TMP2]]
test_vld1_s16(int16_t const * a)10050 int16x4_t test_vld1_s16(int16_t const *a) {
10051 return vld1_s16(a);
10052 }
10053
10054 // CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 {
10055 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10056 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10057 // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
10058 // CHECK: ret <2 x i32> [[TMP2]]
test_vld1_s32(int32_t const * a)10059 int32x2_t test_vld1_s32(int32_t const *a) {
10060 return vld1_s32(a);
10061 }
10062
10063 // CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 {
10064 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10065 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10066 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
10067 // CHECK: ret <1 x i64> [[TMP2]]
test_vld1_s64(int64_t const * a)10068 int64x1_t test_vld1_s64(int64_t const *a) {
10069 return vld1_s64(a);
10070 }
10071
10072 // CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 {
10073 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
10074 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10075 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10076 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
10077 // CHECK: ret <4 x half> [[TMP3]]
test_vld1_f16(float16_t const * a)10078 float16x4_t test_vld1_f16(float16_t const *a) {
10079 return vld1_f16(a);
10080 }
10081
10082 // CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 {
10083 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
10084 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
10085 // CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]]
10086 // CHECK: ret <2 x float> [[TMP2]]
test_vld1_f32(float32_t const * a)10087 float32x2_t test_vld1_f32(float32_t const *a) {
10088 return vld1_f32(a);
10089 }
10090
10091 // CHECK-LABEL: define <1 x double> @test_vld1_f64(double* %a) #0 {
10092 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
10093 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
10094 // CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]]
10095 // CHECK: ret <1 x double> [[TMP2]]
test_vld1_f64(float64_t const * a)10096 float64x1_t test_vld1_f64(float64_t const *a) {
10097 return vld1_f64(a);
10098 }
10099
10100 // CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 {
10101 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10102 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
10103 // CHECK: ret <8 x i8> [[TMP1]]
test_vld1_p8(poly8_t const * a)10104 poly8x8_t test_vld1_p8(poly8_t const *a) {
10105 return vld1_p8(a);
10106 }
10107
10108 // CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 {
10109 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10110 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10111 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10112 // CHECK: ret <4 x i16> [[TMP2]]
test_vld1_p16(poly16_t const * a)10113 poly16x4_t test_vld1_p16(poly16_t const *a) {
10114 return vld1_p16(a);
10115 }
10116
10117 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_u8(i8* %a) #0 {
10118 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
10119 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
10120 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
10121 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10122 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10123 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
10124 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
10125 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
10126 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
10127 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
10128 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
10129 // CHECK: ret %struct.uint8x16x2_t [[TMP5]]
test_vld2q_u8(uint8_t const * a)10130 uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
10131 return vld2q_u8(a);
10132 }
10133
10134 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_u16(i16* %a) #0 {
10135 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
10136 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
10137 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
10138 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10139 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10140 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10141 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10142 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10143 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
10144 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
10145 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10146 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
10147 // CHECK: ret %struct.uint16x8x2_t [[TMP6]]
test_vld2q_u16(uint16_t const * a)10148 uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
10149 return vld2q_u16(a);
10150 }
10151
10152 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_u32(i32* %a) #0 {
10153 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
10154 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
10155 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
10156 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10157 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10158 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10159 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
10160 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
10161 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
10162 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
10163 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10164 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
10165 // CHECK: ret %struct.uint32x4x2_t [[TMP6]]
test_vld2q_u32(uint32_t const * a)10166 uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
10167 return vld2q_u32(a);
10168 }
10169
10170 // CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_u64(i64* %a) #0 {
10171 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
10172 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
10173 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
10174 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10175 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10176 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10177 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
10178 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
10179 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
10180 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
10181 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10182 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
10183 // CHECK: ret %struct.uint64x2x2_t [[TMP6]]
test_vld2q_u64(uint64_t const * a)10184 uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
10185 return vld2q_u64(a);
10186 }
10187
10188 // CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_s8(i8* %a) #0 {
10189 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
10190 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
10191 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
10192 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10193 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10194 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
10195 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
10196 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
10197 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
10198 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
10199 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
10200 // CHECK: ret %struct.int8x16x2_t [[TMP5]]
test_vld2q_s8(int8_t const * a)10201 int8x16x2_t test_vld2q_s8(int8_t const *a) {
10202 return vld2q_s8(a);
10203 }
10204
10205 // CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_s16(i16* %a) #0 {
10206 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
10207 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
10208 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
10209 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10210 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10211 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10212 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10213 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10214 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
10215 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
10216 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10217 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
10218 // CHECK: ret %struct.int16x8x2_t [[TMP6]]
test_vld2q_s16(int16_t const * a)10219 int16x8x2_t test_vld2q_s16(int16_t const *a) {
10220 return vld2q_s16(a);
10221 }
10222
10223 // CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_s32(i32* %a) #0 {
10224 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
10225 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
10226 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
10227 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10228 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10229 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10230 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
10231 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
10232 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
10233 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
10234 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10235 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
10236 // CHECK: ret %struct.int32x4x2_t [[TMP6]]
test_vld2q_s32(int32_t const * a)10237 int32x4x2_t test_vld2q_s32(int32_t const *a) {
10238 return vld2q_s32(a);
10239 }
10240
10241 // CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_s64(i64* %a) #0 {
10242 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
10243 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
10244 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
10245 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10246 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10247 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10248 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
10249 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
10250 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
10251 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
10252 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10253 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
10254 // CHECK: ret %struct.int64x2x2_t [[TMP6]]
test_vld2q_s64(int64_t const * a)10255 int64x2x2_t test_vld2q_s64(int64_t const *a) {
10256 return vld2q_s64(a);
10257 }
10258
10259 // CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_f16(half* %a) #0 {
10260 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
10261 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
10262 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
10263 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10264 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10265 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10266 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10267 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10268 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
10269 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
10270 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10271 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
10272 // CHECK: ret %struct.float16x8x2_t [[TMP6]]
test_vld2q_f16(float16_t const * a)10273 float16x8x2_t test_vld2q_f16(float16_t const *a) {
10274 return vld2q_f16(a);
10275 }
10276
10277 // CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_f32(float* %a) #0 {
10278 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
10279 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
10280 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
10281 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10282 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10283 // CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10284 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
10285 // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
10286 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
10287 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
10288 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10289 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
10290 // CHECK: ret %struct.float32x4x2_t [[TMP6]]
test_vld2q_f32(float32_t const * a)10291 float32x4x2_t test_vld2q_f32(float32_t const *a) {
10292 return vld2q_f32(a);
10293 }
10294
10295 // CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_f64(double* %a) #0 {
10296 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
10297 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
10298 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
10299 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10300 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10301 // CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10302 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
10303 // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
10304 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
10305 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
10306 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10307 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
10308 // CHECK: ret %struct.float64x2x2_t [[TMP6]]
test_vld2q_f64(float64_t const * a)10309 float64x2x2_t test_vld2q_f64(float64_t const *a) {
10310 return vld2q_f64(a);
10311 }
10312
10313 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_p8(i8* %a) #0 {
10314 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
10315 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
10316 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
10317 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10318 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10319 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
10320 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
10321 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
10322 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
10323 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
10324 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
10325 // CHECK: ret %struct.poly8x16x2_t [[TMP5]]
test_vld2q_p8(poly8_t const * a)10326 poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
10327 return vld2q_p8(a);
10328 }
10329
10330 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_p16(i16* %a) #0 {
10331 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
10332 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
10333 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
10334 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10335 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10336 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10337 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10338 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10339 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
10340 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
10341 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10342 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
10343 // CHECK: ret %struct.poly16x8x2_t [[TMP6]]
test_vld2q_p16(poly16_t const * a)10344 poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
10345 return vld2q_p16(a);
10346 }
10347
10348 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_u8(i8* %a) #0 {
10349 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
10350 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
10351 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
10352 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10353 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10354 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
10355 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
10356 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
10357 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
10358 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
10359 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
10360 // CHECK: ret %struct.uint8x8x2_t [[TMP5]]
test_vld2_u8(uint8_t const * a)10361 uint8x8x2_t test_vld2_u8(uint8_t const *a) {
10362 return vld2_u8(a);
10363 }
10364
10365 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_u16(i16* %a) #0 {
10366 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
10367 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
10368 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
10369 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10370 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10371 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10372 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10373 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10374 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
10375 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
10376 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10377 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
10378 // CHECK: ret %struct.uint16x4x2_t [[TMP6]]
test_vld2_u16(uint16_t const * a)10379 uint16x4x2_t test_vld2_u16(uint16_t const *a) {
10380 return vld2_u16(a);
10381 }
10382
10383 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_u32(i32* %a) #0 {
10384 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
10385 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
10386 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
10387 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10388 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10389 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10390 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
10391 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
10392 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
10393 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
10394 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10395 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
10396 // CHECK: ret %struct.uint32x2x2_t [[TMP6]]
test_vld2_u32(uint32_t const * a)10397 uint32x2x2_t test_vld2_u32(uint32_t const *a) {
10398 return vld2_u32(a);
10399 }
10400
10401 // CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_u64(i64* %a) #0 {
10402 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
10403 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
10404 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
10405 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10406 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10407 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10408 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
10409 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
10410 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
10411 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
10412 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10413 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
10414 // CHECK: ret %struct.uint64x1x2_t [[TMP6]]
test_vld2_u64(uint64_t const * a)10415 uint64x1x2_t test_vld2_u64(uint64_t const *a) {
10416 return vld2_u64(a);
10417 }
10418
10419 // CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_s8(i8* %a) #0 {
10420 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
10421 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
10422 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
10423 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10424 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10425 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
10426 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
10427 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
10428 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
10429 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
10430 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
10431 // CHECK: ret %struct.int8x8x2_t [[TMP5]]
test_vld2_s8(int8_t const * a)10432 int8x8x2_t test_vld2_s8(int8_t const *a) {
10433 return vld2_s8(a);
10434 }
10435
10436 // CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_s16(i16* %a) #0 {
10437 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
10438 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
10439 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
10440 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10441 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10442 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10443 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10444 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10445 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
10446 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
10447 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10448 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
10449 // CHECK: ret %struct.int16x4x2_t [[TMP6]]
test_vld2_s16(int16_t const * a)10450 int16x4x2_t test_vld2_s16(int16_t const *a) {
10451 return vld2_s16(a);
10452 }
10453
10454 // CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_s32(i32* %a) #0 {
10455 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
10456 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
10457 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
10458 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10459 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10460 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10461 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
10462 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
10463 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
10464 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
10465 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10466 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
10467 // CHECK: ret %struct.int32x2x2_t [[TMP6]]
test_vld2_s32(int32_t const * a)10468 int32x2x2_t test_vld2_s32(int32_t const *a) {
10469 return vld2_s32(a);
10470 }
10471
10472 // CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_s64(i64* %a) #0 {
10473 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
10474 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
10475 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
10476 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10477 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10478 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10479 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
10480 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
10481 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
10482 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
10483 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10484 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
10485 // CHECK: ret %struct.int64x1x2_t [[TMP6]]
test_vld2_s64(int64_t const * a)10486 int64x1x2_t test_vld2_s64(int64_t const *a) {
10487 return vld2_s64(a);
10488 }
10489
10490 // CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_f16(half* %a) #0 {
10491 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
10492 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
10493 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
10494 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10495 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10496 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10497 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10498 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10499 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
10500 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
10501 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10502 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
10503 // CHECK: ret %struct.float16x4x2_t [[TMP6]]
test_vld2_f16(float16_t const * a)10504 float16x4x2_t test_vld2_f16(float16_t const *a) {
10505 return vld2_f16(a);
10506 }
10507
10508 // CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_f32(float* %a) #0 {
10509 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
10510 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
10511 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
10512 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10513 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10514 // CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10515 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
10516 // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
10517 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
10518 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
10519 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10520 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
10521 // CHECK: ret %struct.float32x2x2_t [[TMP6]]
test_vld2_f32(float32_t const * a)10522 float32x2x2_t test_vld2_f32(float32_t const *a) {
10523 return vld2_f32(a);
10524 }
10525
10526 // CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_f64(double* %a) #0 {
10527 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
10528 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
10529 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
10530 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10531 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10532 // CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10533 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
10534 // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
10535 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
10536 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
10537 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10538 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
10539 // CHECK: ret %struct.float64x1x2_t [[TMP6]]
test_vld2_f64(float64_t const * a)10540 float64x1x2_t test_vld2_f64(float64_t const *a) {
10541 return vld2_f64(a);
10542 }
10543
10544 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_p8(i8* %a) #0 {
10545 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
10546 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
10547 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
10548 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10549 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10550 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
10551 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
10552 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
10553 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
10554 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
10555 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
10556 // CHECK: ret %struct.poly8x8x2_t [[TMP5]]
test_vld2_p8(poly8_t const * a)10557 poly8x8x2_t test_vld2_p8(poly8_t const *a) {
10558 return vld2_p8(a);
10559 }
10560
10561 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_p16(i16* %a) #0 {
10562 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
10563 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
10564 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
10565 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10566 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10567 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10568 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10569 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10570 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
10571 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
10572 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10573 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
10574 // CHECK: ret %struct.poly16x4x2_t [[TMP6]]
test_vld2_p16(poly16_t const * a)10575 poly16x4x2_t test_vld2_p16(poly16_t const *a) {
10576 return vld2_p16(a);
10577 }
10578
10579 // CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_u8(i8* %a) #0 {
10580 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
10581 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
10582 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
10583 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10584 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10585 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
10586 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10587 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
10588 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
10589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
10590 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
10591 // CHECK: ret %struct.uint8x16x3_t [[TMP5]]
test_vld3q_u8(uint8_t const * a)10592 uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
10593 return vld3q_u8(a);
10594 }
10595
10596 // CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_u16(i16* %a) #0 {
10597 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
10598 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
10599 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
10600 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10601 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10602 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10603 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10604 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10605 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
10606 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
10607 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10608 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
10609 // CHECK: ret %struct.uint16x8x3_t [[TMP6]]
test_vld3q_u16(uint16_t const * a)10610 uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
10611 return vld3q_u16(a);
10612 }
10613
10614 // CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_u32(i32* %a) #0 {
10615 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
10616 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
10617 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
10618 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10619 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10620 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10621 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
10622 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10623 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
10624 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
10625 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10626 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
10627 // CHECK: ret %struct.uint32x4x3_t [[TMP6]]
test_vld3q_u32(uint32_t const * a)10628 uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
10629 return vld3q_u32(a);
10630 }
10631
10632 // CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_u64(i64* %a) #0 {
10633 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
10634 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
10635 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
10636 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10637 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10638 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10639 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
10640 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10641 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
10642 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
10643 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10644 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
10645 // CHECK: ret %struct.uint64x2x3_t [[TMP6]]
test_vld3q_u64(uint64_t const * a)10646 uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
10647 return vld3q_u64(a);
10648 }
10649
10650 // CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_s8(i8* %a) #0 {
10651 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
10652 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
10653 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
10654 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10655 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10656 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
10657 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10658 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
10659 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
10660 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
10661 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
10662 // CHECK: ret %struct.int8x16x3_t [[TMP5]]
test_vld3q_s8(int8_t const * a)10663 int8x16x3_t test_vld3q_s8(int8_t const *a) {
10664 return vld3q_s8(a);
10665 }
10666
10667 // CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_s16(i16* %a) #0 {
10668 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
10669 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
10670 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
10671 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10672 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10673 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10674 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10675 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10676 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
10677 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
10678 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10679 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
10680 // CHECK: ret %struct.int16x8x3_t [[TMP6]]
test_vld3q_s16(int16_t const * a)10681 int16x8x3_t test_vld3q_s16(int16_t const *a) {
10682 return vld3q_s16(a);
10683 }
10684
10685 // CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_s32(i32* %a) #0 {
10686 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
10687 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
10688 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
10689 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10690 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10691 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10692 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
10693 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10694 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
10695 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
10696 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10697 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
10698 // CHECK: ret %struct.int32x4x3_t [[TMP6]]
test_vld3q_s32(int32_t const * a)10699 int32x4x3_t test_vld3q_s32(int32_t const *a) {
10700 return vld3q_s32(a);
10701 }
10702
10703 // CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_s64(i64* %a) #0 {
10704 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
10705 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
10706 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
10707 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10708 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10709 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10710 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
10711 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10712 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
10713 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
10714 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10715 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
10716 // CHECK: ret %struct.int64x2x3_t [[TMP6]]
test_vld3q_s64(int64_t const * a)10717 int64x2x3_t test_vld3q_s64(int64_t const *a) {
10718 return vld3q_s64(a);
10719 }
10720
10721 // CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_f16(half* %a) #0 {
10722 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
10723 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
10724 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
10725 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10726 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10727 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10728 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10729 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10730 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
10731 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
10732 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10733 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
10734 // CHECK: ret %struct.float16x8x3_t [[TMP6]]
test_vld3q_f16(float16_t const * a)10735 float16x8x3_t test_vld3q_f16(float16_t const *a) {
10736 return vld3q_f16(a);
10737 }
10738
10739 // CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_f32(float* %a) #0 {
10740 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
10741 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
10742 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
10743 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10744 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10745 // CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10746 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
10747 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
10748 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
10749 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
10750 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10751 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
10752 // CHECK: ret %struct.float32x4x3_t [[TMP6]]
test_vld3q_f32(float32_t const * a)10753 float32x4x3_t test_vld3q_f32(float32_t const *a) {
10754 return vld3q_f32(a);
10755 }
10756
10757 // CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_f64(double* %a) #0 {
10758 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
10759 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
10760 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
10761 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10762 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10763 // CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10764 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
10765 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
10766 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
10767 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
10768 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10769 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
10770 // CHECK: ret %struct.float64x2x3_t [[TMP6]]
test_vld3q_f64(float64_t const * a)10771 float64x2x3_t test_vld3q_f64(float64_t const *a) {
10772 return vld3q_f64(a);
10773 }
10774
10775 // CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_p8(i8* %a) #0 {
10776 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
10777 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
10778 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
10779 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10780 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10781 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
10782 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10783 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
10784 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
10785 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
10786 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
10787 // CHECK: ret %struct.poly8x16x3_t [[TMP5]]
test_vld3q_p8(poly8_t const * a)10788 poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
10789 return vld3q_p8(a);
10790 }
10791
10792 // CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_p16(i16* %a) #0 {
10793 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
10794 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
10795 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
10796 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10797 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10798 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10799 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10800 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10801 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
10802 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
10803 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10804 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
10805 // CHECK: ret %struct.poly16x8x3_t [[TMP6]]
test_vld3q_p16(poly16_t const * a)10806 poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
10807 return vld3q_p16(a);
10808 }
10809
10810 // CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_u8(i8* %a) #0 {
10811 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
10812 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
10813 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
10814 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10815 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10816 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10817 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10818 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
10819 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
10820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
10821 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
10822 // CHECK: ret %struct.uint8x8x3_t [[TMP5]]
test_vld3_u8(uint8_t const * a)10823 uint8x8x3_t test_vld3_u8(uint8_t const *a) {
10824 return vld3_u8(a);
10825 }
10826
10827 // CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_u16(i16* %a) #0 {
10828 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
10829 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
10830 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
10831 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10832 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10833 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10834 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10835 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10836 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
10837 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
10838 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10839 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
10840 // CHECK: ret %struct.uint16x4x3_t [[TMP6]]
test_vld3_u16(uint16_t const * a)10841 uint16x4x3_t test_vld3_u16(uint16_t const *a) {
10842 return vld3_u16(a);
10843 }
10844
10845 // CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_u32(i32* %a) #0 {
10846 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
10847 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
10848 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
10849 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10850 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10851 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10852 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10853 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10854 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
10855 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
10856 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10857 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
10858 // CHECK: ret %struct.uint32x2x3_t [[TMP6]]
test_vld3_u32(uint32_t const * a)10859 uint32x2x3_t test_vld3_u32(uint32_t const *a) {
10860 return vld3_u32(a);
10861 }
10862
10863 // CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_u64(i64* %a) #0 {
10864 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
10865 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
10866 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
10867 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10868 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10869 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10870 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10871 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10872 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
10873 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
10874 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10875 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
10876 // CHECK: ret %struct.uint64x1x3_t [[TMP6]]
test_vld3_u64(uint64_t const * a)10877 uint64x1x3_t test_vld3_u64(uint64_t const *a) {
10878 return vld3_u64(a);
10879 }
10880
10881 // CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_s8(i8* %a) #0 {
10882 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
10883 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
10884 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
10885 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10886 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10887 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10888 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10889 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
10890 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
10891 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
10892 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
10893 // CHECK: ret %struct.int8x8x3_t [[TMP5]]
test_vld3_s8(int8_t const * a)10894 int8x8x3_t test_vld3_s8(int8_t const *a) {
10895 return vld3_s8(a);
10896 }
10897
10898 // CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_s16(i16* %a) #0 {
10899 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
10900 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
10901 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
10902 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10903 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10904 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10905 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10906 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10907 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
10908 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
10909 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10910 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
10911 // CHECK: ret %struct.int16x4x3_t [[TMP6]]
test_vld3_s16(int16_t const * a)10912 int16x4x3_t test_vld3_s16(int16_t const *a) {
10913 return vld3_s16(a);
10914 }
10915
10916 // CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_s32(i32* %a) #0 {
10917 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
10918 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
10919 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10920 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10921 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10922 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10923 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10924 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10925 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
10926 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10927 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10928 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
10929 // CHECK: ret %struct.int32x2x3_t [[TMP6]]
test_vld3_s32(int32_t const * a)10930 int32x2x3_t test_vld3_s32(int32_t const *a) {
10931 return vld3_s32(a);
10932 }
10933
10934 // CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_s64(i64* %a) #0 {
10935 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
10936 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
10937 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10938 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10939 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10940 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10941 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10942 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10943 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
10944 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10945 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10946 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
10947 // CHECK: ret %struct.int64x1x3_t [[TMP6]]
test_vld3_s64(int64_t const * a)10948 int64x1x3_t test_vld3_s64(int64_t const *a) {
10949 return vld3_s64(a);
10950 }
10951
10952 // CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_f16(half* %a) #0 {
10953 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
10954 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
10955 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10956 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10957 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10958 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10959 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10960 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10961 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
10962 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10963 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10964 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
10965 // CHECK: ret %struct.float16x4x3_t [[TMP6]]
test_vld3_f16(float16_t const * a)10966 float16x4x3_t test_vld3_f16(float16_t const *a) {
10967 return vld3_f16(a);
10968 }
10969
10970 // CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_f32(float* %a) #0 {
10971 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
10972 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
10973 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10974 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10975 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10976 // CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10977 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
10978 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10979 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
10980 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10981 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10982 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
10983 // CHECK: ret %struct.float32x2x3_t [[TMP6]]
test_vld3_f32(float32_t const * a)10984 float32x2x3_t test_vld3_f32(float32_t const *a) {
10985 return vld3_f32(a);
10986 }
10987
10988 // CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_f64(double* %a) #0 {
10989 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
10990 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
10991 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10992 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10993 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10994 // CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10995 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
10996 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10997 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
10998 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10999 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
11000 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
11001 // CHECK: ret %struct.float64x1x3_t [[TMP6]]
test_vld3_f64(float64_t const * a)11002 float64x1x3_t test_vld3_f64(float64_t const *a) {
11003 return vld3_f64(a);
11004 }
11005
11006 // CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_p8(i8* %a) #0 {
11007 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
11008 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
11009 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
11010 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11011 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11012 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
11013 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11014 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
11015 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
11016 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
11017 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
11018 // CHECK: ret %struct.poly8x8x3_t [[TMP5]]
test_vld3_p8(poly8_t const * a)11019 poly8x8x3_t test_vld3_p8(poly8_t const *a) {
11020 return vld3_p8(a);
11021 }
11022
11023 // CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_p16(i16* %a) #0 {
11024 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
11025 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
11026 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
11027 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
11028 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11029 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11030 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
11031 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11032 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
11033 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
11034 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
11035 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
11036 // CHECK: ret %struct.poly16x4x3_t [[TMP6]]
test_vld3_p16(poly16_t const * a)11037 poly16x4x3_t test_vld3_p16(poly16_t const *a) {
11038 return vld3_p16(a);
11039 }
11040
11041 // CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_u8(i8* %a) #0 {
11042 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
11043 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
11044 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
11045 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
11046 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
11047 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
11048 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
11049 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
11050 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
11051 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
11052 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
11053 // CHECK: ret %struct.uint8x16x4_t [[TMP5]]
test_vld4q_u8(uint8_t const * a)11054 uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
11055 return vld4q_u8(a);
11056 }
11057
11058 // CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_u16(i16* %a) #0 {
11059 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
11060 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
11061 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
11062 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
11063 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11064 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11065 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11066 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11067 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
11068 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
11069 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11070 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
11071 // CHECK: ret %struct.uint16x8x4_t [[TMP6]]
test_vld4q_u16(uint16_t const * a)11072 uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
11073 return vld4q_u16(a);
11074 }
11075
11076 // CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_u32(i32* %a) #0 {
11077 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
11078 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
11079 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
11080 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
11081 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
11082 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
11083 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
11084 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
11085 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
11086 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
11087 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11088 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
11089 // CHECK: ret %struct.uint32x4x4_t [[TMP6]]
test_vld4q_u32(uint32_t const * a)11090 uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
11091 return vld4q_u32(a);
11092 }
11093
11094 // CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_u64(i64* %a) #0 {
11095 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
11096 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
11097 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
11098 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
11099 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
11100 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
11101 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
11102 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
11103 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
11104 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
11105 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11106 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
11107 // CHECK: ret %struct.uint64x2x4_t [[TMP6]]
test_vld4q_u64(uint64_t const * a)11108 uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
11109 return vld4q_u64(a);
11110 }
11111
11112 // CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_s8(i8* %a) #0 {
11113 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
11114 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
11115 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
11116 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
11117 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
11118 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
11119 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
11120 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
11121 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
11122 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
11123 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
11124 // CHECK: ret %struct.int8x16x4_t [[TMP5]]
test_vld4q_s8(int8_t const * a)11125 int8x16x4_t test_vld4q_s8(int8_t const *a) {
11126 return vld4q_s8(a);
11127 }
11128
11129 // CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_s16(i16* %a) #0 {
11130 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
11131 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
11132 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
11133 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
11134 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11135 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11136 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11137 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11138 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
11139 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
11140 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11141 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
11142 // CHECK: ret %struct.int16x8x4_t [[TMP6]]
test_vld4q_s16(int16_t const * a)11143 int16x8x4_t test_vld4q_s16(int16_t const *a) {
11144 return vld4q_s16(a);
11145 }
11146
11147 // CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_s32(i32* %a) #0 {
11148 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
11149 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
11150 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
11151 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
11152 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
11153 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
11154 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
11155 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
11156 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
11157 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
11158 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11159 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
11160 // CHECK: ret %struct.int32x4x4_t [[TMP6]]
test_vld4q_s32(int32_t const * a)11161 int32x4x4_t test_vld4q_s32(int32_t const *a) {
11162 return vld4q_s32(a);
11163 }
11164
11165 // CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_s64(i64* %a) #0 {
11166 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
11167 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
11168 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
11169 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
11170 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
11171 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
11172 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
11173 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
11174 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
11175 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
11176 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11177 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
11178 // CHECK: ret %struct.int64x2x4_t [[TMP6]]
test_vld4q_s64(int64_t const * a)11179 int64x2x4_t test_vld4q_s64(int64_t const *a) {
11180 return vld4q_s64(a);
11181 }
11182
11183 // CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_f16(half* %a) #0 {
11184 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
11185 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
11186 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
11187 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
11188 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11189 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11190 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11191 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11192 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
11193 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
11194 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11195 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
11196 // CHECK: ret %struct.float16x8x4_t [[TMP6]]
test_vld4q_f16(float16_t const * a)11197 float16x8x4_t test_vld4q_f16(float16_t const *a) {
11198 return vld4q_f16(a);
11199 }
11200
11201 // CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_f32(float* %a) #0 {
11202 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
11203 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
11204 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
11205 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
11206 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
11207 // CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
11208 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
11209 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
11210 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
11211 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
11212 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11213 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
11214 // CHECK: ret %struct.float32x4x4_t [[TMP6]]
test_vld4q_f32(float32_t const * a)11215 float32x4x4_t test_vld4q_f32(float32_t const *a) {
11216 return vld4q_f32(a);
11217 }
11218
11219 // CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_f64(double* %a) #0 {
11220 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
11221 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
11222 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
11223 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
11224 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
11225 // CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]])
11226 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
11227 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
11228 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
11229 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
11230 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11231 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
11232 // CHECK: ret %struct.float64x2x4_t [[TMP6]]
test_vld4q_f64(float64_t const * a)11233 float64x2x4_t test_vld4q_f64(float64_t const *a) {
11234 return vld4q_f64(a);
11235 }
11236
11237 // CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_p8(i8* %a) #0 {
11238 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
11239 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
11240 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
11241 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
11242 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
11243 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
11244 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
11245 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
11246 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
11247 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
11248 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
11249 // CHECK: ret %struct.poly8x16x4_t [[TMP5]]
test_vld4q_p8(poly8_t const * a)11250 poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
11251 return vld4q_p8(a);
11252 }
11253
11254 // CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_p16(i16* %a) #0 {
11255 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
11256 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
11257 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
11258 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
11259 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11260 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11261 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11262 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11263 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
11264 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
11265 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11266 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
11267 // CHECK: ret %struct.poly16x8x4_t [[TMP6]]
test_vld4q_p16(poly16_t const * a)11268 poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
11269 return vld4q_p16(a);
11270 }
11271
11272 // CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_u8(i8* %a) #0 {
11273 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
11274 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
11275 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
11276 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11277 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11278 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
11279 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11280 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
11281 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
11282 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
11283 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
11284 // CHECK: ret %struct.uint8x8x4_t [[TMP5]]
test_vld4_u8(uint8_t const * a)11285 uint8x8x4_t test_vld4_u8(uint8_t const *a) {
11286 return vld4_u8(a);
11287 }
11288
11289 // CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_u16(i16* %a) #0 {
11290 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
11291 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
11292 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
11293 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
11294 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11295 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11296 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11297 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11298 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
11299 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
11300 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11301 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
11302 // CHECK: ret %struct.uint16x4x4_t [[TMP6]]
test_vld4_u16(uint16_t const * a)11303 uint16x4x4_t test_vld4_u16(uint16_t const *a) {
11304 return vld4_u16(a);
11305 }
11306
11307 // CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_u32(i32* %a) #0 {
11308 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
11309 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
11310 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
11311 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
11312 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
11313 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
11314 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
11315 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
11316 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
11317 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
11318 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11319 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
11320 // CHECK: ret %struct.uint32x2x4_t [[TMP6]]
test_vld4_u32(uint32_t const * a)11321 uint32x2x4_t test_vld4_u32(uint32_t const *a) {
11322 return vld4_u32(a);
11323 }
11324
11325 // CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_u64(i64* %a) #0 {
11326 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
11327 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
11328 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
11329 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
11330 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
11331 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
11332 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
11333 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
11334 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
11335 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
11336 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11337 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
11338 // CHECK: ret %struct.uint64x1x4_t [[TMP6]]
test_vld4_u64(uint64_t const * a)11339 uint64x1x4_t test_vld4_u64(uint64_t const *a) {
11340 return vld4_u64(a);
11341 }
11342
11343 // CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_s8(i8* %a) #0 {
11344 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
11345 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
11346 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
11347 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11348 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11349 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
11350 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11351 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
11352 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
11353 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
11354 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
11355 // CHECK: ret %struct.int8x8x4_t [[TMP5]]
test_vld4_s8(int8_t const * a)11356 int8x8x4_t test_vld4_s8(int8_t const *a) {
11357 return vld4_s8(a);
11358 }
11359
11360 // CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_s16(i16* %a) #0 {
11361 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
11362 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
11363 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
11364 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
11365 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11366 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11367 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11368 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11369 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
11370 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
11371 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11372 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
11373 // CHECK: ret %struct.int16x4x4_t [[TMP6]]
test_vld4_s16(int16_t const * a)11374 int16x4x4_t test_vld4_s16(int16_t const *a) {
11375 return vld4_s16(a);
11376 }
11377
11378 // CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_s32(i32* %a) #0 {
11379 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
11380 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
11381 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
11382 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
11383 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
11384 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
11385 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
11386 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
11387 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
11388 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
11389 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11390 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
11391 // CHECK: ret %struct.int32x2x4_t [[TMP6]]
test_vld4_s32(int32_t const * a)11392 int32x2x4_t test_vld4_s32(int32_t const *a) {
11393 return vld4_s32(a);
11394 }
11395
11396 // CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_s64(i64* %a) #0 {
11397 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
11398 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
11399 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
11400 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
11401 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
11402 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
11403 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
11404 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
11405 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
11406 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
11407 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11408 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
11409 // CHECK: ret %struct.int64x1x4_t [[TMP6]]
test_vld4_s64(int64_t const * a)11410 int64x1x4_t test_vld4_s64(int64_t const *a) {
11411 return vld4_s64(a);
11412 }
11413
11414 // CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_f16(half* %a) #0 {
11415 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
11416 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
11417 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
11418 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
11419 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11420 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11421 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11422 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11423 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
11424 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
11425 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11426 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
11427 // CHECK: ret %struct.float16x4x4_t [[TMP6]]
test_vld4_f16(float16_t const * a)11428 float16x4x4_t test_vld4_f16(float16_t const *a) {
11429 return vld4_f16(a);
11430 }
11431
11432 // CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_f32(float* %a) #0 {
11433 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
11434 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
11435 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
11436 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
11437 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
11438 // CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
11439 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
11440 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
11441 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
11442 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
11443 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11444 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
11445 // CHECK: ret %struct.float32x2x4_t [[TMP6]]
test_vld4_f32(float32_t const * a)11446 float32x2x4_t test_vld4_f32(float32_t const *a) {
11447 return vld4_f32(a);
11448 }
11449
11450 // CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_f64(double* %a) #0 {
11451 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
11452 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
11453 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
11454 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
11455 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
11456 // CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]])
11457 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
11458 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
11459 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
11460 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
11461 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11462 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
11463 // CHECK: ret %struct.float64x1x4_t [[TMP6]]
test_vld4_f64(float64_t const * a)11464 float64x1x4_t test_vld4_f64(float64_t const *a) {
11465 return vld4_f64(a);
11466 }
11467
11468 // CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_p8(i8* %a) #0 {
11469 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
11470 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
11471 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
11472 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11473 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11474 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
11475 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11476 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
11477 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
11478 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
11479 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
11480 // CHECK: ret %struct.poly8x8x4_t [[TMP5]]
test_vld4_p8(poly8_t const * a)11481 poly8x8x4_t test_vld4_p8(poly8_t const *a) {
11482 return vld4_p8(a);
11483 }
11484
11485 // CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_p16(i16* %a) #0 {
11486 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
11487 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
11488 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
11489 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
11490 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11491 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11492 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11493 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11494 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
11495 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
11496 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11497 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
11498 // CHECK: ret %struct.poly16x4x4_t [[TMP6]]
test_vld4_p16(poly16_t const * a)11499 poly16x4x4_t test_vld4_p16(poly16_t const *a) {
11500 return vld4_p16(a);
11501 }
11502
11503 // CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 {
11504 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
11505 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
11506 // CHECK: ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)11507 void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
11508 vst1q_u8(a, b);
11509 }
11510
11511 // CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 {
11512 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
11513 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11514 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11515 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11516 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11517 // CHECK: ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)11518 void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
11519 vst1q_u16(a, b);
11520 }
11521
11522 // CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 {
11523 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
11524 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11525 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
11526 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11527 // CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
11528 // CHECK: ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)11529 void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
11530 vst1q_u32(a, b);
11531 }
11532
11533 // CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 {
11534 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
11535 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11536 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
11537 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
11538 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
11539 // CHECK: ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)11540 void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
11541 vst1q_u64(a, b);
11542 }
11543
11544 // CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 {
11545 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
11546 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
11547 // CHECK: ret void
test_vst1q_s8(int8_t * a,int8x16_t b)11548 void test_vst1q_s8(int8_t *a, int8x16_t b) {
11549 vst1q_s8(a, b);
11550 }
11551
11552 // CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 {
11553 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
11554 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11555 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11556 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11557 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11558 // CHECK: ret void
test_vst1q_s16(int16_t * a,int16x8_t b)11559 void test_vst1q_s16(int16_t *a, int16x8_t b) {
11560 vst1q_s16(a, b);
11561 }
11562
11563 // CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 {
11564 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
11565 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11566 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
11567 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11568 // CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
11569 // CHECK: ret void
test_vst1q_s32(int32_t * a,int32x4_t b)11570 void test_vst1q_s32(int32_t *a, int32x4_t b) {
11571 vst1q_s32(a, b);
11572 }
11573
11574 // CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 {
11575 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
11576 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11577 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
11578 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
11579 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
11580 // CHECK: ret void
test_vst1q_s64(int64_t * a,int64x2_t b)11581 void test_vst1q_s64(int64_t *a, int64x2_t b) {
11582 vst1q_s64(a, b);
11583 }
11584
11585 // CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 {
11586 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
11587 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
11588 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11589 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11590 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11591 // CHECK: ret void
test_vst1q_f16(float16_t * a,float16x8_t b)11592 void test_vst1q_f16(float16_t *a, float16x8_t b) {
11593 vst1q_f16(a, b);
11594 }
11595
11596 // CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 {
11597 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
11598 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
11599 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
11600 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
11601 // CHECK: store <4 x float> [[TMP3]], <4 x float>* [[TMP2]]
11602 // CHECK: ret void
test_vst1q_f32(float32_t * a,float32x4_t b)11603 void test_vst1q_f32(float32_t *a, float32x4_t b) {
11604 vst1q_f32(a, b);
11605 }
11606
11607 // CHECK-LABEL: define void @test_vst1q_f64(double* %a, <2 x double> %b) #0 {
11608 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
11609 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
11610 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
11611 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
11612 // CHECK: store <2 x double> [[TMP3]], <2 x double>* [[TMP2]]
11613 // CHECK: ret void
test_vst1q_f64(float64_t * a,float64x2_t b)11614 void test_vst1q_f64(float64_t *a, float64x2_t b) {
11615 vst1q_f64(a, b);
11616 }
11617
11618 // CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 {
11619 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
11620 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
11621 // CHECK: ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)11622 void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
11623 vst1q_p8(a, b);
11624 }
11625
11626 // CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 {
11627 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
11628 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11629 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11630 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11631 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11632 // CHECK: ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)11633 void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
11634 vst1q_p16(a, b);
11635 }
11636
11637 // CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 {
11638 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
11639 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
11640 // CHECK: ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)11641 void test_vst1_u8(uint8_t *a, uint8x8_t b) {
11642 vst1_u8(a, b);
11643 }
11644
11645 // CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 {
11646 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
11647 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11648 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11649 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11650 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11651 // CHECK: ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)11652 void test_vst1_u16(uint16_t *a, uint16x4_t b) {
11653 vst1_u16(a, b);
11654 }
11655
11656 // CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 {
11657 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
11658 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11659 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
11660 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11661 // CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
11662 // CHECK: ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)11663 void test_vst1_u32(uint32_t *a, uint32x2_t b) {
11664 vst1_u32(a, b);
11665 }
11666
11667 // CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 {
11668 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
11669 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11670 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
11671 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
11672 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
11673 // CHECK: ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)11674 void test_vst1_u64(uint64_t *a, uint64x1_t b) {
11675 vst1_u64(a, b);
11676 }
11677
11678 // CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 {
11679 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
11680 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
11681 // CHECK: ret void
test_vst1_s8(int8_t * a,int8x8_t b)11682 void test_vst1_s8(int8_t *a, int8x8_t b) {
11683 vst1_s8(a, b);
11684 }
11685
11686 // CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 {
11687 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
11688 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11689 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11690 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11691 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11692 // CHECK: ret void
test_vst1_s16(int16_t * a,int16x4_t b)11693 void test_vst1_s16(int16_t *a, int16x4_t b) {
11694 vst1_s16(a, b);
11695 }
11696
11697 // CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 {
11698 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
11699 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11700 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
11701 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11702 // CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
11703 // CHECK: ret void
test_vst1_s32(int32_t * a,int32x2_t b)11704 void test_vst1_s32(int32_t *a, int32x2_t b) {
11705 vst1_s32(a, b);
11706 }
11707
11708 // CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 {
11709 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
11710 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11711 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
11712 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
11713 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
11714 // CHECK: ret void
test_vst1_s64(int64_t * a,int64x1_t b)11715 void test_vst1_s64(int64_t *a, int64x1_t b) {
11716 vst1_s64(a, b);
11717 }
11718
11719 // CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 {
11720 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
11721 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
11722 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11723 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11724 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11725 // CHECK: ret void
test_vst1_f16(float16_t * a,float16x4_t b)11726 void test_vst1_f16(float16_t *a, float16x4_t b) {
11727 vst1_f16(a, b);
11728 }
11729
11730 // CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 {
11731 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
11732 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11733 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
11734 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
11735 // CHECK: store <2 x float> [[TMP3]], <2 x float>* [[TMP2]]
11736 // CHECK: ret void
test_vst1_f32(float32_t * a,float32x2_t b)11737 void test_vst1_f32(float32_t *a, float32x2_t b) {
11738 vst1_f32(a, b);
11739 }
11740
11741 // CHECK-LABEL: define void @test_vst1_f64(double* %a, <1 x double> %b) #0 {
11742 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
11743 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
11744 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
11745 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
11746 // CHECK: store <1 x double> [[TMP3]], <1 x double>* [[TMP2]]
11747 // CHECK: ret void
test_vst1_f64(float64_t * a,float64x1_t b)11748 void test_vst1_f64(float64_t *a, float64x1_t b) {
11749 vst1_f64(a, b);
11750 }
11751
11752 // CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 {
11753 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
11754 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
11755 // CHECK: ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)11756 void test_vst1_p8(poly8_t *a, poly8x8_t b) {
11757 vst1_p8(a, b);
11758 }
11759
11760 // CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 {
11761 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
11762 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11763 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11764 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11765 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11766 // CHECK: ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)11767 void test_vst1_p16(poly16_t *a, poly16x4_t b) {
11768 vst1_p16(a, b);
11769 }
11770
11771 // CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
11772 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
11773 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
11774 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
11775 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11776 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
11777 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
11778 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11779 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
11780 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11781 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11782 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
11783 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11784 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11785 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11786 // CHECK: ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)11787 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
11788 vst2q_u8(a, b);
11789 }
11790
11791 // CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
11792 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
11793 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
11794 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
11795 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11796 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
11797 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
11798 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11799 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11800 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
11801 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11802 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11803 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11804 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
11805 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11806 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11807 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11808 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11809 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11810 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11811 // CHECK: ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)11812 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
11813 vst2q_u16(a, b);
11814 }
11815
11816 // CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
11817 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
11818 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
11819 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
11820 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
11821 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
11822 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
11823 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11824 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11825 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
11826 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
11827 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11828 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11829 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
11830 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11831 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11832 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11833 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11834 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11835 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11836 // CHECK: ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)11837 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
11838 vst2q_u32(a, b);
11839 }
11840
11841 // CHECK-LABEL: define void @test_vst2q_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
11842 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
11843 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
11844 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
11845 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11846 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
11847 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
11848 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11849 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11850 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
11851 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11852 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11853 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11854 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
11855 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11856 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11857 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11858 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11859 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11860 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11861 // CHECK: ret void
test_vst2q_u64(uint64_t * a,uint64x2x2_t b)11862 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
11863 vst2q_u64(a, b);
11864 }
11865
11866 // CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
11867 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
11868 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
11869 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
11870 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11871 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
11872 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
11873 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11874 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
11875 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11876 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11877 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
11878 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11879 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11880 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11881 // CHECK: ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)11882 void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
11883 vst2q_s8(a, b);
11884 }
11885
11886 // CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
11887 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
11888 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
11889 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
11890 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11891 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
11892 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
11893 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11894 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11895 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
11896 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11897 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11898 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11899 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
11900 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11901 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11902 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11903 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11904 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11905 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11906 // CHECK: ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)11907 void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
11908 vst2q_s16(a, b);
11909 }
11910
11911 // CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
11912 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
11913 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
11914 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
11915 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
11916 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
11917 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
11918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11919 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11920 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11921 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
11922 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11923 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11924 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11925 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11926 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11927 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11928 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11929 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11930 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11931 // CHECK: ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)11932 void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
11933 vst2q_s32(a, b);
11934 }
11935
11936 // CHECK-LABEL: define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
11937 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
11938 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
11939 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
11940 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11941 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
11942 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
11943 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11944 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11945 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11946 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11947 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11948 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11949 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11950 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11951 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11952 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11953 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11954 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11955 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11956 // CHECK: ret void
test_vst2q_s64(int64_t * a,int64x2x2_t b)11957 void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
11958 vst2q_s64(a, b);
11959 }
11960
11961 // CHECK-LABEL: define void @test_vst2q_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 {
11962 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
11963 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
11964 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
11965 // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
11966 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
11967 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
11968 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11969 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
11970 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11971 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
11972 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11973 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11974 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11975 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
11976 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11977 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11978 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11979 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11980 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11981 // CHECK: ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)11982 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
11983 vst2q_f16(a, b);
11984 }
11985
11986 // CHECK-LABEL: define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 {
11987 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
11988 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
11989 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
11990 // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
11991 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
11992 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
11993 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11994 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
11995 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11996 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
11997 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11998 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11999 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
12000 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
12001 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12002 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12003 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12004 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12005 // CHECK: call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]])
12006 // CHECK: ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)12007 void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
12008 vst2q_f32(a, b);
12009 }
12010
12011 // CHECK-LABEL: define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 {
12012 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
12013 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
12014 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
12015 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
12016 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
12017 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
12018 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
12019 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
12020 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
12021 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
12022 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12023 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12024 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
12025 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
12026 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12027 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12028 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12029 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12030 // CHECK: call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]])
12031 // CHECK: ret void
test_vst2q_f64(float64_t * a,float64x2x2_t b)12032 void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
12033 vst2q_f64(a, b);
12034 }
12035
12036 // CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
12037 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
12038 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
12039 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
12040 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
12041 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
12042 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
12043 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
12044 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
12045 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
12046 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12047 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
12048 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12049 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12050 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
12051 // CHECK: ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)12052 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
12053 vst2q_p8(a, b);
12054 }
12055
12056 // CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
12057 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
12058 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
12059 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
12060 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
12061 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
12062 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
12063 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
12064 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12065 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
12066 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
12067 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12068 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12069 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
12070 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12071 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12072 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12073 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12074 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12075 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
12076 // CHECK: ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)12077 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
12078 vst2q_p16(a, b);
12079 }
12080
12081 // CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
12082 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
12083 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
12084 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
12085 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
12086 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
12087 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
12088 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12089 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
12090 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
12091 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
12093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12094 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12095 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
12096 // CHECK: ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)12097 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
12098 vst2_u8(a, b);
12099 }
12100
12101 // CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
12102 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
12103 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
12104 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
12105 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
12106 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
12107 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
12108 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12109 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12110 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
12111 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
12112 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12113 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12114 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
12115 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12116 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12117 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12118 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12119 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12120 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12121 // CHECK: ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)12122 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
12123 vst2_u16(a, b);
12124 }
12125
12126 // CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
12127 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
12128 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
12129 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
12130 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
12131 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
12132 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
12133 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12134 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12135 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
12136 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
12137 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12138 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12139 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
12140 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12141 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12142 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12143 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12144 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12145 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
12146 // CHECK: ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)12147 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
12148 vst2_u32(a, b);
12149 }
12150
12151 // CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
12152 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
12153 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
12154 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
12155 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
12156 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
12157 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
12158 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12159 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12160 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
12161 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
12162 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12163 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12164 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
12165 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12166 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12167 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12168 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12169 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12170 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
12171 // CHECK: ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)12172 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
12173 vst2_u64(a, b);
12174 }
12175
12176 // CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
12177 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
12178 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
12179 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
12180 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
12181 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
12182 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
12183 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12184 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
12185 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
12186 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12187 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
12188 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12189 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12190 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
12191 // CHECK: ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)12192 void test_vst2_s8(int8_t *a, int8x8x2_t b) {
12193 vst2_s8(a, b);
12194 }
12195
12196 // CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
12197 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
12198 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
12199 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
12200 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
12201 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
12202 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
12203 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12204 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12205 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
12206 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
12207 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12208 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12209 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
12210 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12211 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12212 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12213 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12214 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12215 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12216 // CHECK: ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)12217 void test_vst2_s16(int16_t *a, int16x4x2_t b) {
12218 vst2_s16(a, b);
12219 }
12220
12221 // CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
12222 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
12223 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
12224 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
12225 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
12226 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
12227 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
12228 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12229 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12230 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
12231 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
12232 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12233 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12234 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
12235 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12236 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12237 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12238 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12239 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12240 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
12241 // CHECK: ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)12242 void test_vst2_s32(int32_t *a, int32x2x2_t b) {
12243 vst2_s32(a, b);
12244 }
12245
12246 // CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
12247 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
12248 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
12249 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
12250 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
12251 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
12252 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
12253 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12254 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12255 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
12256 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
12257 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12258 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12259 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
12260 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12261 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12262 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12263 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12264 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12265 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
12266 // CHECK: ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)12267 void test_vst2_s64(int64_t *a, int64x1x2_t b) {
12268 vst2_s64(a, b);
12269 }
12270
12271 // CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 {
12272 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
12273 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
12274 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
12275 // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
12276 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
12277 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
12278 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12279 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
12280 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
12281 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
12282 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12283 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12284 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
12285 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
12286 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12287 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12288 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12289 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12290 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12291 // CHECK: ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)12292 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
12293 vst2_f16(a, b);
12294 }
12295
12296 // CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 {
12297 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
12298 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
12299 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
12300 // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
12301 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
12302 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
12303 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12304 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
12305 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
12306 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
12307 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12308 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12309 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
12310 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
12311 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12312 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12313 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12314 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12315 // CHECK: call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]])
12316 // CHECK: ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)12317 void test_vst2_f32(float32_t *a, float32x2x2_t b) {
12318 vst2_f32(a, b);
12319 }
12320
12321 // CHECK-LABEL: define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 {
12322 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
12323 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
12324 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
12325 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
12326 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
12327 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
12328 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12329 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
12330 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
12331 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
12332 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12333 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12334 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
12335 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
12336 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12337 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12338 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12339 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12340 // CHECK: call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]])
12341 // CHECK: ret void
test_vst2_f64(float64_t * a,float64x1x2_t b)12342 void test_vst2_f64(float64_t *a, float64x1x2_t b) {
12343 vst2_f64(a, b);
12344 }
12345
12346 // CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
12347 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
12348 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
12349 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
12350 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
12351 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
12352 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
12353 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12354 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
12355 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
12356 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12357 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
12358 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12359 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12360 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
12361 // CHECK: ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)12362 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
12363 vst2_p8(a, b);
12364 }
12365
12366 // CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
12367 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
12368 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
12369 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
12370 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
12371 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
12372 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
12373 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12374 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12375 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
12376 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
12377 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12378 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12379 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
12380 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12381 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12382 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12383 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12384 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12385 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12386 // CHECK: ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)12387 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
12388 vst2_p16(a, b);
12389 }
12390
12391 // CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
12392 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
12393 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
12394 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
12395 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
12396 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
12397 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
12398 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12399 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
12400 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
12401 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12402 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
12403 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12404 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12405 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
12406 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12407 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12408 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
12409 // CHECK: ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)12410 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
12411 vst3q_u8(a, b);
12412 }
12413
12414 // CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
12415 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
12416 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
12417 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
12418 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
12419 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
12420 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
12421 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12422 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12423 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
12424 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
12425 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12426 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12427 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
12428 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12429 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12430 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12431 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
12432 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12433 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12434 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12435 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12436 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12437 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12438 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12439 // CHECK: ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)12440 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
12441 vst3q_u16(a, b);
12442 }
12443
12444 // CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
12445 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
12446 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
12447 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
12448 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
12449 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
12450 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
12451 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12452 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12453 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
12454 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
12455 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12456 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12457 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
12458 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12459 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12460 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12461 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
12462 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12463 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12464 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12465 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12466 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12467 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12468 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
12469 // CHECK: ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)12470 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
12471 vst3q_u32(a, b);
12472 }
12473
12474 // CHECK-LABEL: define void @test_vst3q_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
12475 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
12476 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
12477 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
12478 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
12479 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
12480 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
12481 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12482 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12483 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
12484 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
12485 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12486 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12487 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
12488 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12489 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12490 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12491 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
12492 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12493 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12494 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12495 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12496 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12497 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12498 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
12499 // CHECK: ret void
test_vst3q_u64(uint64_t * a,uint64x2x3_t b)12500 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
12501 vst3q_u64(a, b);
12502 }
12503
12504 // CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
12505 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
12506 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
12507 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
12508 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
12509 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
12510 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
12511 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12512 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
12513 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
12514 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12515 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
12516 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12517 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12518 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
12519 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12520 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12521 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
12522 // CHECK: ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)12523 void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
12524 vst3q_s8(a, b);
12525 }
12526
12527 // CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
12528 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
12529 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
12530 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
12531 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
12532 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
12533 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
12534 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12535 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12536 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
12537 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
12538 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12539 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12540 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
12541 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12542 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12543 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12544 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
12545 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12546 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12547 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12548 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12549 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12550 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12551 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12552 // CHECK: ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)12553 void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
12554 vst3q_s16(a, b);
12555 }
12556
12557 // CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
12558 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
12559 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
12560 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
12561 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
12562 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
12563 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
12564 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12565 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12566 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
12567 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
12568 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12569 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12570 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
12571 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12572 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12573 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12574 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
12575 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12576 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12577 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12578 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12579 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12580 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12581 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
12582 // CHECK: ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)12583 void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
12584 vst3q_s32(a, b);
12585 }
12586
12587 // CHECK-LABEL: define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
12588 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
12589 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
12590 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
12591 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
12592 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
12593 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
12594 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12595 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12596 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
12597 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
12598 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12599 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12600 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
12601 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12602 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12603 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12604 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
12605 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12606 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12607 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12608 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12609 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12610 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12611 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
12612 // CHECK: ret void
test_vst3q_s64(int64_t * a,int64x2x3_t b)12613 void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
12614 vst3q_s64(a, b);
12615 }
12616
12617 // CHECK-LABEL: define void @test_vst3q_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 {
12618 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
12619 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
12620 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
12621 // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
12622 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
12623 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
12624 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12625 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
12626 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
12627 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
12628 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
12629 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
12630 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
12631 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
12632 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
12633 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
12634 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
12635 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
12636 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
12637 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
12638 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12639 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12640 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12641 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12642 // CHECK: ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)12643 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
12644 vst3q_f16(a, b);
12645 }
12646
12647 // CHECK-LABEL: define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 {
12648 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
12649 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
12650 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
12651 // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
12652 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
12653 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
12654 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12655 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
12656 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
12657 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
12658 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
12659 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
12660 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
12661 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
12662 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12663 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12664 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
12665 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
12666 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
12667 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
12668 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12669 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12670 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
12671 // CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]])
12672 // CHECK: ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)12673 void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
12674 vst3q_f32(a, b);
12675 }
12676
12677 // CHECK-LABEL: define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 {
12678 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
12679 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
12680 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
12681 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
12682 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
12683 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
12684 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12685 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
12686 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
12687 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
12688 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12689 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12690 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
12691 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
12692 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12693 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12694 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
12695 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
12696 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
12697 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12698 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12699 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12700 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12701 // CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]])
12702 // CHECK: ret void
test_vst3q_f64(float64_t * a,float64x2x3_t b)12703 void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
12704 vst3q_f64(a, b);
12705 }
12706
12707 // CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
12708 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
12709 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
12710 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
12711 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
12712 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
12713 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
12714 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12715 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
12716 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
12717 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12718 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
12719 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12720 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12721 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
12722 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12723 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12724 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
12725 // CHECK: ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)12726 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
12727 vst3q_p8(a, b);
12728 }
12729
12730 // CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
12731 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
12732 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
12733 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
12734 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
12735 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
12736 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
12737 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12738 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12739 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
12740 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
12741 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12742 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12743 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
12744 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12745 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12746 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12747 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
12748 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12749 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12750 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12751 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12752 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12753 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12754 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12755 // CHECK: ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)12756 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
12757 vst3q_p16(a, b);
12758 }
12759
12760 // CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
12761 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
12762 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
12763 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
12764 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12765 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
12766 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
12767 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12768 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
12769 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12770 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12771 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
12772 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12773 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12774 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
12775 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12776 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12777 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12778 // CHECK: ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)12779 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
12780 vst3_u8(a, b);
12781 }
12782
12783 // CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
12784 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
12785 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
12786 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
12787 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12788 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
12789 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
12790 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12791 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12792 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
12793 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12794 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12795 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12796 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
12797 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12798 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12799 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12800 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
12801 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12802 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12803 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12804 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12805 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12806 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12807 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12808 // CHECK: ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)12809 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
12810 vst3_u16(a, b);
12811 }
12812
12813 // CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
12814 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
12815 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
12816 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
12817 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
12818 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
12819 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
12820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12821 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12822 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12823 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12824 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12825 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12826 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12827 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12828 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12829 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12830 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12831 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12832 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12833 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12834 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12835 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12836 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12837 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12838 // CHECK: ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)12839 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
12840 vst3_u32(a, b);
12841 }
12842
12843 // CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
12844 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
12845 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
12846 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
12847 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12848 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
12849 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
12850 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12851 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12852 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12853 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12854 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12855 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12856 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12857 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12858 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12859 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12860 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12861 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12862 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12863 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12864 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12865 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12866 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12867 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12868 // CHECK: ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)12869 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
12870 vst3_u64(a, b);
12871 }
12872
12873 // CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
12874 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
12875 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
12876 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
12877 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12878 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
12879 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
12880 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12881 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12882 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12883 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12884 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12885 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12886 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12887 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12888 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12889 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12890 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12891 // CHECK: ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)12892 void test_vst3_s8(int8_t *a, int8x8x3_t b) {
12893 vst3_s8(a, b);
12894 }
12895
12896 // CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
12897 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
12898 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
12899 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
12900 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12901 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
12902 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
12903 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12904 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12905 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12906 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12907 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12908 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12909 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12910 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12911 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12912 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12913 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12914 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12915 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12916 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12917 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12918 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12919 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12920 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12921 // CHECK: ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)12922 void test_vst3_s16(int16_t *a, int16x4x3_t b) {
12923 vst3_s16(a, b);
12924 }
12925
12926 // CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
12927 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
12928 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
12929 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
12930 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
12931 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
12932 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
12933 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12934 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12935 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12936 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12937 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12938 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12939 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12940 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12941 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12942 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12943 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12944 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12945 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12946 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12947 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12948 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12949 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12950 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12951 // CHECK: ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)12952 void test_vst3_s32(int32_t *a, int32x2x3_t b) {
12953 vst3_s32(a, b);
12954 }
12955
12956 // CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
12957 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
12958 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
12959 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
12960 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12961 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
12962 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
12963 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12964 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12965 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12966 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12967 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12968 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12969 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12970 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12971 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12972 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12973 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12974 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12975 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12976 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12977 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12978 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12979 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12980 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12981 // CHECK: ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)12982 void test_vst3_s64(int64_t *a, int64x1x3_t b) {
12983 vst3_s64(a, b);
12984 }
12985
12986 // CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 {
12987 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
12988 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
12989 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
12990 // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
12991 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
12992 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
12993 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12994 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
12995 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12996 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
12997 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12998 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12999 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
13000 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
13001 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
13002 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
13003 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
13004 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
13005 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
13006 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
13007 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13008 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13009 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13010 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
13011 // CHECK: ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)13012 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
13013 vst3_f16(a, b);
13014 }
13015
13016 // CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 {
13017 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
13018 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
13019 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
13020 // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
13021 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
13022 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
13023 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13024 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
13025 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
13026 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
13027 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
13028 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
13029 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
13030 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
13031 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
13032 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
13033 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
13034 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
13035 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
13036 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
13037 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
13038 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
13039 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
13040 // CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]])
13041 // CHECK: ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)13042 void test_vst3_f32(float32_t *a, float32x2x3_t b) {
13043 vst3_f32(a, b);
13044 }
13045
13046 // CHECK-LABEL: define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 {
13047 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
13048 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
13049 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
13050 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
13051 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
13052 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
13053 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13054 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13055 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13056 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
13057 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13058 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13059 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13060 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
13061 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13062 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13063 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13064 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
13065 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13066 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13067 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13068 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13069 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13070 // CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]])
13071 // CHECK: ret void
test_vst3_f64(float64_t * a,float64x1x3_t b)13072 void test_vst3_f64(float64_t *a, float64x1x3_t b) {
13073 vst3_f64(a, b);
13074 }
13075
13076 // CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
13077 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
13078 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
13079 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
13080 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
13081 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
13082 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
13083 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13084 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
13085 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
13086 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13087 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
13088 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13089 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13090 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
13091 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13092 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13093 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
13094 // CHECK: ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)13095 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
13096 vst3_p8(a, b);
13097 }
13098
13099 // CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
13100 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
13101 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
13102 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
13103 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
13104 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
13105 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
13106 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13107 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
13108 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
13109 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
13110 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13111 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13112 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
13113 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13114 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13115 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13116 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
13117 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13118 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13119 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13120 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13121 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13122 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13123 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
13124 // CHECK: ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)13125 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
13126 vst3_p16(a, b);
13127 }
13128
13129 // CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
13130 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
13131 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
13132 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
13133 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
13134 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
13135 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
13136 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13137 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13138 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
13139 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
13140 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13141 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
13142 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
13143 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13144 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
13145 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
13146 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13147 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
13148 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
13149 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
13150 // CHECK: ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)13151 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
13152 vst4q_u8(a, b);
13153 }
13154
13155 // CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
13156 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
13157 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
13158 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
13159 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
13160 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
13161 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
13162 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13163 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
13164 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13165 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
13166 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
13167 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
13168 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13169 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
13170 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
13171 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
13172 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13173 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
13174 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
13175 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
13176 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13177 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
13178 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
13179 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
13180 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13181 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13182 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13183 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13184 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13185 // CHECK: ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)13186 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
13187 vst4q_u16(a, b);
13188 }
13189
13190 // CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
13191 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
13192 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
13193 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
13194 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
13195 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
13196 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
13197 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13198 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
13199 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13200 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
13201 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
13202 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
13203 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13204 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
13205 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
13206 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
13207 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13208 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
13209 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
13210 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
13211 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13212 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
13213 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
13214 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
13215 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
13216 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
13217 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
13218 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
13219 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
13220 // CHECK: ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)13221 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
13222 vst4q_u32(a, b);
13223 }
13224
13225 // CHECK-LABEL: define void @test_vst4q_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
13226 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
13227 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
13228 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
13229 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13230 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
13231 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
13232 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13233 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13234 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13235 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13236 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13237 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13238 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13239 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13240 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13241 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13242 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13243 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13244 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13245 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13246 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13247 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13248 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13249 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13250 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13251 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13252 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13253 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13254 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
13255 // CHECK: ret void
test_vst4q_u64(uint64_t * a,uint64x2x4_t b)13256 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
13257 vst4q_u64(a, b);
13258 }
13259
13260 // CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
13261 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
13262 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
13263 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
13264 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
13265 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
13266 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
13267 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13268 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13269 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
13270 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
13271 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13272 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
13273 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
13274 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13275 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
13276 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
13277 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13278 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
13279 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
13280 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
13281 // CHECK: ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)13282 void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
13283 vst4q_s8(a, b);
13284 }
13285
13286 // CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
13287 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
13288 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
13289 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
13290 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
13291 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
13292 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
13293 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13294 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
13295 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13296 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
13297 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
13298 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
13299 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13300 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
13301 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
13302 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
13303 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13304 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
13305 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
13306 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
13307 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13308 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
13309 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
13310 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
13311 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13312 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13313 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13314 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13315 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13316 // CHECK: ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)13317 void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
13318 vst4q_s16(a, b);
13319 }
13320
13321 // CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
13322 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
13323 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
13324 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
13325 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
13326 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
13327 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
13328 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13329 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
13330 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13331 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
13332 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
13333 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
13334 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13335 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
13336 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
13337 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
13338 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13339 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
13340 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
13341 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
13342 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13343 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
13344 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
13345 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
13346 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
13347 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
13348 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
13349 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
13350 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
13351 // CHECK: ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)13352 void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
13353 vst4q_s32(a, b);
13354 }
13355
13356 // CHECK-LABEL: define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
13357 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
13358 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
13359 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
13360 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13361 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
13362 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
13363 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13364 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13365 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13366 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13367 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13368 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13369 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13370 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13371 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13372 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13373 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13374 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13375 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13376 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13377 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13378 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13379 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13380 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13381 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13382 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13383 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13384 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13385 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
13386 // CHECK: ret void
test_vst4q_s64(int64_t * a,int64x2x4_t b)13387 void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
13388 vst4q_s64(a, b);
13389 }
13390
13391 // CHECK-LABEL: define void @test_vst4q_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 {
13392 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
13393 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
13394 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
13395 // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
13396 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
13397 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
13398 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13399 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
13400 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13401 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
13402 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
13403 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
13404 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13405 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
13406 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
13407 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
13408 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13409 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
13410 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
13411 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
13412 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13413 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
13414 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
13415 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
13416 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13417 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13418 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13419 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13420 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13421 // CHECK: ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)13422 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
13423 vst4q_f16(a, b);
13424 }
13425
13426 // CHECK-LABEL: define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 {
13427 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
13428 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
13429 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
13430 // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
13431 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
13432 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
13433 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13434 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
13435 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13436 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
13437 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
13438 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
13439 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13440 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
13441 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
13442 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
13443 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13444 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
13445 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
13446 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
13447 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13448 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
13449 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
13450 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
13451 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
13452 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
13453 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
13454 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
13455 // CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]])
13456 // CHECK: ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)13457 void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
13458 vst4q_f32(a, b);
13459 }
13460
13461 // CHECK-LABEL: define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 {
13462 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
13463 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
13464 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
13465 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
13466 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
13467 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
13468 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13469 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13470 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13471 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
13472 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13473 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13474 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13475 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
13476 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13477 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13478 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13479 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
13480 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13481 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13482 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13483 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
13484 // CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
13485 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
13486 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13487 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13488 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13489 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
13490 // CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]])
13491 // CHECK: ret void
test_vst4q_f64(float64_t * a,float64x2x4_t b)13492 void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
13493 vst4q_f64(a, b);
13494 }
13495
13496 // CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
13497 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
13498 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
13499 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
13500 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
13501 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
13502 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
13503 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13504 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13505 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
13506 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
13507 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13508 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
13509 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
13510 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13511 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
13512 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
13513 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13514 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
13515 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
13516 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
13517 // CHECK: ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)13518 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
13519 vst4q_p8(a, b);
13520 }
13521
13522 // CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
13523 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
13524 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
13525 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
13526 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
13527 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
13528 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
13529 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13530 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
13531 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13532 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
13533 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
13534 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
13535 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13536 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
13537 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
13538 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
13539 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13540 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
13541 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
13542 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
13543 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13544 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
13545 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
13546 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
13547 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13548 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13549 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13550 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13551 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13552 // CHECK: ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)13553 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
13554 vst4q_p16(a, b);
13555 }
13556
13557 // CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
13558 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
13559 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
13560 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
13561 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13562 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
13563 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
13564 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13565 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13566 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13567 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13568 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13569 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13570 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13571 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13572 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13573 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13574 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13575 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13576 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13577 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13578 // CHECK: ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)13579 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
13580 vst4_u8(a, b);
13581 }
13582
13583 // CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
13584 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
13585 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
13586 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
13587 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13588 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
13589 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
13590 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13591 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
13592 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13593 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13594 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13595 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13596 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13597 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13598 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13599 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13600 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13601 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13602 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13603 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13604 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13605 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13606 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13607 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13608 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13609 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13610 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13611 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13612 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13613 // CHECK: ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)13614 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
13615 vst4_u16(a, b);
13616 }
13617
13618 // CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
13619 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
13620 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
13621 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
13622 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
13623 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
13624 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
13625 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13626 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
13627 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13628 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
13629 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
13630 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
13631 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13632 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
13633 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
13634 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
13635 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13636 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
13637 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
13638 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
13639 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13640 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
13641 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
13642 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
13643 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
13644 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
13645 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
13646 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
13647 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
13648 // CHECK: ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)13649 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
13650 vst4_u32(a, b);
13651 }
13652
13653 // CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
13654 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
13655 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
13656 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
13657 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13658 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
13659 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
13660 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13661 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13662 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13663 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13664 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13665 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13666 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13667 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13668 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13669 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13670 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13671 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13672 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13673 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13674 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13675 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13676 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13677 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13678 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13679 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13680 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13681 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13682 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
13683 // CHECK: ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)13684 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
13685 vst4_u64(a, b);
13686 }
13687
13688 // CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
13689 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
13690 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
13691 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
13692 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13693 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
13694 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
13695 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13696 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13697 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13698 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13699 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13700 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13701 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13702 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13703 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13704 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13705 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13706 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13707 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13708 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13709 // CHECK: ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)13710 void test_vst4_s8(int8_t *a, int8x8x4_t b) {
13711 vst4_s8(a, b);
13712 }
13713
13714 // CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
13715 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
13716 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
13717 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
13718 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13719 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
13720 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
13721 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13722 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
13723 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13724 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13725 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13726 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13727 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13728 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13729 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13730 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13731 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13732 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13733 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13734 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13735 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13736 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13737 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13738 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13739 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13740 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13741 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13742 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13743 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13744 // CHECK: ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)13745 void test_vst4_s16(int16_t *a, int16x4x4_t b) {
13746 vst4_s16(a, b);
13747 }
13748
13749 // CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
13750 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
13751 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
13752 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
13753 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
13754 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
13755 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
13756 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13757 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
13758 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13759 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
13760 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
13761 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
13762 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13763 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
13764 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
13765 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
13766 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13767 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
13768 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
13769 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
13770 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13771 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
13772 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
13773 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
13774 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
13775 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
13776 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
13777 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
13778 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
13779 // CHECK: ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)13780 void test_vst4_s32(int32_t *a, int32x2x4_t b) {
13781 vst4_s32(a, b);
13782 }
13783
13784 // CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
13785 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
13786 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
13787 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
13788 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13789 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
13790 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
13791 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13792 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13793 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13794 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13795 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13796 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13797 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13798 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13799 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13800 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13801 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13802 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13803 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13804 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13805 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13806 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13807 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13808 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13809 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13810 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13811 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13812 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13813 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
13814 // CHECK: ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)13815 void test_vst4_s64(int64_t *a, int64x1x4_t b) {
13816 vst4_s64(a, b);
13817 }
13818
13819 // CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 {
13820 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
13821 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
13822 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
13823 // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
13824 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
13825 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
13826 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13827 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
13828 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13829 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
13830 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
13831 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
13832 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13833 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
13834 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
13835 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
13836 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13837 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
13838 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
13839 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
13840 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13841 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
13842 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
13843 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
13844 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13845 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13846 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13847 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13848 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13849 // CHECK: ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)13850 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
13851 vst4_f16(a, b);
13852 }
13853
13854 // CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 {
13855 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
13856 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
13857 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
13858 // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
13859 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
13860 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
13861 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13862 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
13863 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13864 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
13865 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
13866 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
13867 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13868 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
13869 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
13870 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
13871 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13872 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
13873 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
13874 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
13875 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13876 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
13877 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
13878 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
13879 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
13880 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
13881 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
13882 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
13883 // CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]])
13884 // CHECK: ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)13885 void test_vst4_f32(float32_t *a, float32x2x4_t b) {
13886 vst4_f32(a, b);
13887 }
13888
13889 // CHECK-LABEL: define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 {
13890 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
13891 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
13892 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
13893 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
13894 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
13895 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
13896 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13897 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13898 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13899 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
13900 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13901 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13902 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13903 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
13904 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13905 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13906 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13907 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
13908 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13909 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13910 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13911 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
13912 // CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
13913 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
13914 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13915 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13916 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13917 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
13918 // CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]])
13919 // CHECK: ret void
test_vst4_f64(float64_t * a,float64x1x4_t b)13920 void test_vst4_f64(float64_t *a, float64x1x4_t b) {
13921 vst4_f64(a, b);
13922 }
13923
13924 // CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
13925 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
13926 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
13927 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
13928 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13929 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
13930 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
13931 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13932 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13933 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13934 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13935 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13936 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13937 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13938 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13939 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13940 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13941 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13942 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13943 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13944 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13945 // CHECK: ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)13946 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
13947 vst4_p8(a, b);
13948 }
13949
13950 // CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
13951 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
13952 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
13953 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
13954 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13955 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
13956 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
13957 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13958 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
13959 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13960 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13961 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13962 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13963 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13964 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13965 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13966 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13967 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13968 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13969 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13970 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13971 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13972 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13973 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13974 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13975 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13976 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13977 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13978 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13979 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13980 // CHECK: ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)13981 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
13982 vst4_p16(a, b);
13983 }
13984
13985 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vld1q_u8_x2(i8* %a) #0 {
13986 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
13987 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
13988 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
13989 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
13990 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
13991 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
13992 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
13993 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
13994 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
13995 // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
13996 // CHECK: ret %struct.uint8x16x2_t [[TMP4]]
test_vld1q_u8_x2(uint8_t const * a)13997 uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
13998 return vld1q_u8_x2(a);
13999 }
14000
14001 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vld1q_u16_x2(i16* %a) #0 {
14002 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
14003 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
14004 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
14005 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14006 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14007 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14008 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14009 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14010 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
14011 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
14012 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14013 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
14014 // CHECK: ret %struct.uint16x8x2_t [[TMP6]]
test_vld1q_u16_x2(uint16_t const * a)14015 uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
14016 return vld1q_u16_x2(a);
14017 }
14018
14019 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vld1q_u32_x2(i32* %a) #0 {
14020 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
14021 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
14022 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
14023 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
14024 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14025 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]])
14026 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
14027 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
14028 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
14029 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
14030 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14031 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
14032 // CHECK: ret %struct.uint32x4x2_t [[TMP6]]
test_vld1q_u32_x2(uint32_t const * a)14033 uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
14034 return vld1q_u32_x2(a);
14035 }
14036
14037 // CHECK-LABEL: define %struct.uint64x2x2_t @test_vld1q_u64_x2(i64* %a) #0 {
14038 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
14039 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
14040 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
14041 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14042 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14043 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
14044 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
14045 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
14046 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
14047 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
14048 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14049 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
14050 // CHECK: ret %struct.uint64x2x2_t [[TMP6]]
test_vld1q_u64_x2(uint64_t const * a)14051 uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
14052 return vld1q_u64_x2(a);
14053 }
14054
14055 // CHECK-LABEL: define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) #0 {
14056 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
14057 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
14058 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
14059 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
14060 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
14061 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
14062 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
14063 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
14064 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
14065 // CHECK: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
14066 // CHECK: ret %struct.int8x16x2_t [[TMP4]]
test_vld1q_s8_x2(int8_t const * a)14067 int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
14068 return vld1q_s8_x2(a);
14069 }
14070
14071 // CHECK-LABEL: define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) #0 {
14072 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
14073 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
14074 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
14075 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14076 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14077 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14078 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14079 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14080 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
14081 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
14082 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14083 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
14084 // CHECK: ret %struct.int16x8x2_t [[TMP6]]
test_vld1q_s16_x2(int16_t const * a)14085 int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
14086 return vld1q_s16_x2(a);
14087 }
14088
14089 // CHECK-LABEL: define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) #0 {
14090 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
14091 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
14092 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
14093 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
14094 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14095 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]])
14096 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
14097 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
14098 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
14099 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
14100 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14101 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
14102 // CHECK: ret %struct.int32x4x2_t [[TMP6]]
test_vld1q_s32_x2(int32_t const * a)14103 int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
14104 return vld1q_s32_x2(a);
14105 }
14106
14107 // CHECK-LABEL: define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) #0 {
14108 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
14109 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
14110 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
14111 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14112 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14113 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
14114 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
14115 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
14116 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
14117 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
14118 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14119 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
14120 // CHECK: ret %struct.int64x2x2_t [[TMP6]]
test_vld1q_s64_x2(int64_t const * a)14121 int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
14122 return vld1q_s64_x2(a);
14123 }
14124
14125 // CHECK-LABEL: define %struct.float16x8x2_t @test_vld1q_f16_x2(half* %a) #0 {
14126 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
14127 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
14128 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
14129 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
14130 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14131 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14132 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14133 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14134 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
14135 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
14136 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14137 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
14138 // CHECK: ret %struct.float16x8x2_t [[TMP6]]
test_vld1q_f16_x2(float16_t const * a)14139 float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
14140 return vld1q_f16_x2(a);
14141 }
14142
14143 // CHECK-LABEL: define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) #0 {
14144 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
14145 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
14146 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
14147 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
14148 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14149 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* [[TMP2]])
14150 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
14151 // CHECK: store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]]
14152 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
14153 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
14154 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14155 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
14156 // CHECK: ret %struct.float32x4x2_t [[TMP6]]
test_vld1q_f32_x2(float32_t const * a)14157 float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
14158 return vld1q_f32_x2(a);
14159 }
14160
14161 // CHECK-LABEL: define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) #0 {
14162 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
14163 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
14164 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
14165 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
14166 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14167 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]])
14168 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
14169 // CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
14170 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
14171 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
14172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14173 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
14174 // CHECK: ret %struct.float64x2x2_t [[TMP6]]
test_vld1q_f64_x2(float64_t const * a)14175 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
14176 return vld1q_f64_x2(a);
14177 }
14178
14179 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vld1q_p8_x2(i8* %a) #0 {
14180 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
14181 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
14182 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
14183 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
14184 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
14185 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
14186 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
14187 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
14188 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
14189 // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
14190 // CHECK: ret %struct.poly8x16x2_t [[TMP4]]
test_vld1q_p8_x2(poly8_t const * a)14191 poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
14192 return vld1q_p8_x2(a);
14193 }
14194
14195 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vld1q_p16_x2(i16* %a) #0 {
14196 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
14197 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
14198 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
14199 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14200 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14201 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14202 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14203 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14204 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
14205 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
14206 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14207 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
14208 // CHECK: ret %struct.poly16x8x2_t [[TMP6]]
test_vld1q_p16_x2(poly16_t const * a)14209 poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
14210 return vld1q_p16_x2(a);
14211 }
14212
14213 // CHECK-LABEL: define %struct.poly64x2x2_t @test_vld1q_p64_x2(i64* %a) #0 {
14214 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
14215 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
14216 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
14217 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14218 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14219 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
14220 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
14221 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
14222 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
14223 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
14224 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14225 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
14226 // CHECK: ret %struct.poly64x2x2_t [[TMP6]]
test_vld1q_p64_x2(poly64_t const * a)14227 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
14228 return vld1q_p64_x2(a);
14229 }
14230
14231 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vld1_u8_x2(i8* %a) #0 {
14232 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
14233 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
14234 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
14235 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
14236 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
14237 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
14238 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
14239 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
14240 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
14241 // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
14242 // CHECK: ret %struct.uint8x8x2_t [[TMP4]]
test_vld1_u8_x2(uint8_t const * a)14243 uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
14244 return vld1_u8_x2(a);
14245 }
14246
14247 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vld1_u16_x2(i16* %a) #0 {
14248 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
14249 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
14250 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
14251 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14252 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14253 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14254 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14255 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14256 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
14257 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
14258 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14259 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
14260 // CHECK: ret %struct.uint16x4x2_t [[TMP6]]
test_vld1_u16_x2(uint16_t const * a)14261 uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
14262 return vld1_u16_x2(a);
14263 }
14264
14265 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vld1_u32_x2(i32* %a) #0 {
14266 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
14267 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
14268 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
14269 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
14270 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14271 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]])
14272 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
14273 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
14274 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
14275 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
14276 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14277 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
14278 // CHECK: ret %struct.uint32x2x2_t [[TMP6]]
test_vld1_u32_x2(uint32_t const * a)14279 uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
14280 return vld1_u32_x2(a);
14281 }
14282
14283 // CHECK-LABEL: define %struct.uint64x1x2_t @test_vld1_u64_x2(i64* %a) #0 {
14284 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
14285 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
14286 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
14287 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14288 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14289 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
14290 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
14291 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
14292 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
14293 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
14294 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14295 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
14296 // CHECK: ret %struct.uint64x1x2_t [[TMP6]]
test_vld1_u64_x2(uint64_t const * a)14297 uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
14298 return vld1_u64_x2(a);
14299 }
14300
14301 // CHECK-LABEL: define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) #0 {
14302 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
14303 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
14304 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
14305 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
14306 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
14307 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
14308 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
14309 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
14310 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
14311 // CHECK: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
14312 // CHECK: ret %struct.int8x8x2_t [[TMP4]]
test_vld1_s8_x2(int8_t const * a)14313 int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
14314 return vld1_s8_x2(a);
14315 }
14316
14317 // CHECK-LABEL: define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) #0 {
14318 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
14319 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
14320 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
14321 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14322 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14323 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14324 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14325 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14326 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
14327 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
14328 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14329 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
14330 // CHECK: ret %struct.int16x4x2_t [[TMP6]]
test_vld1_s16_x2(int16_t const * a)14331 int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
14332 return vld1_s16_x2(a);
14333 }
14334
14335 // CHECK-LABEL: define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) #0 {
14336 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
14337 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
14338 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
14339 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
14340 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14341 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]])
14342 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
14343 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
14344 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
14345 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
14346 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14347 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
14348 // CHECK: ret %struct.int32x2x2_t [[TMP6]]
test_vld1_s32_x2(int32_t const * a)14349 int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
14350 return vld1_s32_x2(a);
14351 }
14352
14353 // CHECK-LABEL: define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) #0 {
14354 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
14355 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
14356 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
14357 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14358 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14359 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
14360 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
14361 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
14362 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
14363 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
14364 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14365 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
14366 // CHECK: ret %struct.int64x1x2_t [[TMP6]]
test_vld1_s64_x2(int64_t const * a)14367 int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
14368 return vld1_s64_x2(a);
14369 }
14370
14371 // CHECK-LABEL: define %struct.float16x4x2_t @test_vld1_f16_x2(half* %a) #0 {
14372 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
14373 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
14374 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
14375 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
14376 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14377 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14378 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14379 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14380 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
14381 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
14382 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14383 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
14384 // CHECK: ret %struct.float16x4x2_t [[TMP6]]
test_vld1_f16_x2(float16_t const * a)14385 float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
14386 return vld1_f16_x2(a);
14387 }
14388
14389 // CHECK-LABEL: define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) #0 {
14390 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
14391 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
14392 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
14393 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
14394 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14395 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* [[TMP2]])
14396 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
14397 // CHECK: store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]]
14398 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
14399 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
14400 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14401 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
14402 // CHECK: ret %struct.float32x2x2_t [[TMP6]]
test_vld1_f32_x2(float32_t const * a)14403 float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
14404 return vld1_f32_x2(a);
14405 }
14406
14407 // CHECK-LABEL: define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) #0 {
14408 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
14409 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
14410 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
14411 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
14412 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14413 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]])
14414 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
14415 // CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
14416 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
14417 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
14418 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14419 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
14420 // CHECK: ret %struct.float64x1x2_t [[TMP6]]
test_vld1_f64_x2(float64_t const * a)14421 float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
14422 return vld1_f64_x2(a);
14423 }
14424
14425 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vld1_p8_x2(i8* %a) #0 {
14426 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
14427 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
14428 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
14429 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
14430 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
14431 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
14432 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
14433 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
14434 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
14435 // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
14436 // CHECK: ret %struct.poly8x8x2_t [[TMP4]]
test_vld1_p8_x2(poly8_t const * a)14437 poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
14438 return vld1_p8_x2(a);
14439 }
14440
14441 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vld1_p16_x2(i16* %a) #0 {
14442 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
14443 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
14444 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
14445 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14446 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14447 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14448 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14449 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14450 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
14451 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
14452 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14453 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
14454 // CHECK: ret %struct.poly16x4x2_t [[TMP6]]
test_vld1_p16_x2(poly16_t const * a)14455 poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
14456 return vld1_p16_x2(a);
14457 }
14458
14459 // CHECK-LABEL: define %struct.poly64x1x2_t @test_vld1_p64_x2(i64* %a) #0 {
14460 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
14461 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
14462 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
14463 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14464 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14465 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
14466 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
14467 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
14468 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
14469 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
14470 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14471 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
14472 // CHECK: ret %struct.poly64x1x2_t [[TMP6]]
test_vld1_p64_x2(poly64_t const * a)14473 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
14474 return vld1_p64_x2(a);
14475 }
14476
14477 // CHECK-LABEL: define %struct.uint8x16x3_t @test_vld1q_u8_x3(i8* %a) #0 {
14478 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
14479 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
14480 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
14481 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
14482 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
14483 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14484 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
14485 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
14486 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
14487 // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
14488 // CHECK: ret %struct.uint8x16x3_t [[TMP4]]
test_vld1q_u8_x3(uint8_t const * a)14489 uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
14490 return vld1q_u8_x3(a);
14491 }
14492
14493 // CHECK-LABEL: define %struct.uint16x8x3_t @test_vld1q_u16_x3(i16* %a) #0 {
14494 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
14495 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
14496 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
14497 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14498 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14499 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14500 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14501 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14502 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
14503 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
14504 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14505 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
14506 // CHECK: ret %struct.uint16x8x3_t [[TMP6]]
test_vld1q_u16_x3(uint16_t const * a)14507 uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
14508 return vld1q_u16_x3(a);
14509 }
14510
14511 // CHECK-LABEL: define %struct.uint32x4x3_t @test_vld1q_u32_x3(i32* %a) #0 {
14512 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
14513 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
14514 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
14515 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
14516 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14517 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]])
14518 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
14519 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
14520 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
14521 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
14522 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14523 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
14524 // CHECK: ret %struct.uint32x4x3_t [[TMP6]]
test_vld1q_u32_x3(uint32_t const * a)14525 uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
14526 return vld1q_u32_x3(a);
14527 }
14528
14529 // CHECK-LABEL: define %struct.uint64x2x3_t @test_vld1q_u64_x3(i64* %a) #0 {
14530 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
14531 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
14532 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
14533 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14534 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14535 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
14536 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
14537 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
14538 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
14539 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
14540 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14541 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
14542 // CHECK: ret %struct.uint64x2x3_t [[TMP6]]
test_vld1q_u64_x3(uint64_t const * a)14543 uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
14544 return vld1q_u64_x3(a);
14545 }
14546
14547 // CHECK-LABEL: define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) #0 {
14548 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
14549 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
14550 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
14551 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
14552 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
14553 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14554 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
14555 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
14556 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
14557 // CHECK: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
14558 // CHECK: ret %struct.int8x16x3_t [[TMP4]]
test_vld1q_s8_x3(int8_t const * a)14559 int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
14560 return vld1q_s8_x3(a);
14561 }
14562
14563 // CHECK-LABEL: define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) #0 {
14564 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
14565 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
14566 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
14567 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14568 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14569 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14570 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14571 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14572 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
14573 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
14574 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14575 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
14576 // CHECK: ret %struct.int16x8x3_t [[TMP6]]
test_vld1q_s16_x3(int16_t const * a)14577 int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
14578 return vld1q_s16_x3(a);
14579 }
14580
14581 // CHECK-LABEL: define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) #0 {
14582 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
14583 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
14584 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
14585 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
14586 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14587 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]])
14588 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
14589 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
14590 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
14591 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
14592 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14593 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
14594 // CHECK: ret %struct.int32x4x3_t [[TMP6]]
test_vld1q_s32_x3(int32_t const * a)14595 int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
14596 return vld1q_s32_x3(a);
14597 }
14598
14599 // CHECK-LABEL: define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) #0 {
14600 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
14601 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
14602 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
14603 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14604 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14605 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
14606 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
14607 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
14608 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
14609 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
14610 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14611 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
14612 // CHECK: ret %struct.int64x2x3_t [[TMP6]]
test_vld1q_s64_x3(int64_t const * a)14613 int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
14614 return vld1q_s64_x3(a);
14615 }
14616
14617 // CHECK-LABEL: define %struct.float16x8x3_t @test_vld1q_f16_x3(half* %a) #0 {
14618 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
14619 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
14620 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
14621 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
14622 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14623 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14624 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14625 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14626 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
14627 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
14628 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14629 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
14630 // CHECK: ret %struct.float16x8x3_t [[TMP6]]
test_vld1q_f16_x3(float16_t const * a)14631 float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
14632 return vld1q_f16_x3(a);
14633 }
14634
14635 // CHECK-LABEL: define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) #0 {
14636 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
14637 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
14638 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
14639 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
14640 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14641 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* [[TMP2]])
14642 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
14643 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
14644 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
14645 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
14646 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14647 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
14648 // CHECK: ret %struct.float32x4x3_t [[TMP6]]
test_vld1q_f32_x3(float32_t const * a)14649 float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
14650 return vld1q_f32_x3(a);
14651 }
14652
14653 // CHECK-LABEL: define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) #0 {
14654 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
14655 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
14656 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
14657 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
14658 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14659 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]])
14660 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
14661 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
14662 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
14663 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
14664 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14665 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
14666 // CHECK: ret %struct.float64x2x3_t [[TMP6]]
test_vld1q_f64_x3(float64_t const * a)14667 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
14668 return vld1q_f64_x3(a);
14669 }
14670
14671 // CHECK-LABEL: define %struct.poly8x16x3_t @test_vld1q_p8_x3(i8* %a) #0 {
14672 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
14673 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
14674 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
14675 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
14676 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
14677 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14678 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
14679 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
14680 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
14681 // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
14682 // CHECK: ret %struct.poly8x16x3_t [[TMP4]]
test_vld1q_p8_x3(poly8_t const * a)14683 poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
14684 return vld1q_p8_x3(a);
14685 }
14686
14687 // CHECK-LABEL: define %struct.poly16x8x3_t @test_vld1q_p16_x3(i16* %a) #0 {
14688 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
14689 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
14690 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
14691 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14692 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14693 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14694 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14695 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14696 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
14697 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
14698 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14699 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
14700 // CHECK: ret %struct.poly16x8x3_t [[TMP6]]
test_vld1q_p16_x3(poly16_t const * a)14701 poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
14702 return vld1q_p16_x3(a);
14703 }
14704
14705 // CHECK-LABEL: define %struct.poly64x2x3_t @test_vld1q_p64_x3(i64* %a) #0 {
14706 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
14707 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
14708 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
14709 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14710 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14711 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
14712 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
14713 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
14714 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
14715 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
14716 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14717 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
14718 // CHECK: ret %struct.poly64x2x3_t [[TMP6]]
test_vld1q_p64_x3(poly64_t const * a)14719 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
14720 return vld1q_p64_x3(a);
14721 }
14722
14723 // CHECK-LABEL: define %struct.uint8x8x3_t @test_vld1_u8_x3(i8* %a) #0 {
14724 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
14725 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
14726 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
14727 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
14728 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
14729 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
14730 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
14731 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
14732 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
14733 // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
14734 // CHECK: ret %struct.uint8x8x3_t [[TMP4]]
test_vld1_u8_x3(uint8_t const * a)14735 uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
14736 return vld1_u8_x3(a);
14737 }
14738
14739 // CHECK-LABEL: define %struct.uint16x4x3_t @test_vld1_u16_x3(i16* %a) #0 {
14740 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
14741 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
14742 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
14743 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14744 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14745 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14746 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14747 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14748 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
14749 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
14750 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14751 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
14752 // CHECK: ret %struct.uint16x4x3_t [[TMP6]]
test_vld1_u16_x3(uint16_t const * a)14753 uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
14754 return vld1_u16_x3(a);
14755 }
14756
14757 // CHECK-LABEL: define %struct.uint32x2x3_t @test_vld1_u32_x3(i32* %a) #0 {
14758 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
14759 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
14760 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
14761 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
14762 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14763 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]])
14764 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
14765 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
14766 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
14767 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
14768 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14769 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
14770 // CHECK: ret %struct.uint32x2x3_t [[TMP6]]
test_vld1_u32_x3(uint32_t const * a)14771 uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
14772 return vld1_u32_x3(a);
14773 }
14774
14775 // CHECK-LABEL: define %struct.uint64x1x3_t @test_vld1_u64_x3(i64* %a) #0 {
14776 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
14777 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
14778 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
14779 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14780 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14781 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
14782 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
14783 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
14784 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
14785 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
14786 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14787 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
14788 // CHECK: ret %struct.uint64x1x3_t [[TMP6]]
test_vld1_u64_x3(uint64_t const * a)14789 uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
14790 return vld1_u64_x3(a);
14791 }
14792
14793 // CHECK-LABEL: define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) #0 {
14794 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
14795 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
14796 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
14797 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
14798 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
14799 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
14800 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
14801 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
14802 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
14803 // CHECK: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
14804 // CHECK: ret %struct.int8x8x3_t [[TMP4]]
test_vld1_s8_x3(int8_t const * a)14805 int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
14806 return vld1_s8_x3(a);
14807 }
14808
14809 // CHECK-LABEL: define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) #0 {
14810 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
14811 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
14812 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
14813 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14814 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14815 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14816 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14817 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14818 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
14819 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
14820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14821 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
14822 // CHECK: ret %struct.int16x4x3_t [[TMP6]]
test_vld1_s16_x3(int16_t const * a)14823 int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
14824 return vld1_s16_x3(a);
14825 }
14826
14827 // CHECK-LABEL: define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) #0 {
14828 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
14829 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
14830 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
14831 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
14832 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14833 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]])
14834 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
14835 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
14836 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
14837 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
14838 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14839 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
14840 // CHECK: ret %struct.int32x2x3_t [[TMP6]]
test_vld1_s32_x3(int32_t const * a)14841 int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
14842 return vld1_s32_x3(a);
14843 }
14844
14845 // CHECK-LABEL: define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) #0 {
14846 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
14847 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
14848 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
14849 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14850 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14851 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
14852 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
14853 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
14854 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
14855 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
14856 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14857 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
14858 // CHECK: ret %struct.int64x1x3_t [[TMP6]]
test_vld1_s64_x3(int64_t const * a)14859 int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
14860 return vld1_s64_x3(a);
14861 }
14862
14863 // CHECK-LABEL: define %struct.float16x4x3_t @test_vld1_f16_x3(half* %a) #0 {
14864 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
14865 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
14866 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
14867 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
14868 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14869 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14870 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14871 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14872 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
14873 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
14874 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14875 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
14876 // CHECK: ret %struct.float16x4x3_t [[TMP6]]
test_vld1_f16_x3(float16_t const * a)14877 float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
14878 return vld1_f16_x3(a);
14879 }
14880
14881 // CHECK-LABEL: define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) #0 {
14882 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
14883 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
14884 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
14885 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
14886 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14887 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* [[TMP2]])
14888 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
14889 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
14890 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
14891 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
14892 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14893 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
14894 // CHECK: ret %struct.float32x2x3_t [[TMP6]]
test_vld1_f32_x3(float32_t const * a)14895 float32x2x3_t test_vld1_f32_x3(float32_t const *a) {
14896 return vld1_f32_x3(a);
14897 }
14898
14899 // CHECK-LABEL: define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) #0 {
14900 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
14901 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
14902 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
14903 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
14904 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14905 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]])
14906 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
14907 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
14908 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
14909 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
14910 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14911 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
14912 // CHECK: ret %struct.float64x1x3_t [[TMP6]]
test_vld1_f64_x3(float64_t const * a)14913 float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
14914 return vld1_f64_x3(a);
14915 }
14916
14917 // CHECK-LABEL: define %struct.poly8x8x3_t @test_vld1_p8_x3(i8* %a) #0 {
14918 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
14919 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
14920 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
14921 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
14922 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
14923 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
14924 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
14925 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
14926 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
14927 // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
14928 // CHECK: ret %struct.poly8x8x3_t [[TMP4]]
test_vld1_p8_x3(poly8_t const * a)14929 poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) {
14930 return vld1_p8_x3(a);
14931 }
14932
14933 // CHECK-LABEL: define %struct.poly16x4x3_t @test_vld1_p16_x3(i16* %a) #0 {
14934 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
14935 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
14936 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
14937 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14938 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14939 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14940 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14941 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14942 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
14943 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
14944 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14945 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
14946 // CHECK: ret %struct.poly16x4x3_t [[TMP6]]
test_vld1_p16_x3(poly16_t const * a)14947 poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) {
14948 return vld1_p16_x3(a);
14949 }
14950
14951 // CHECK-LABEL: define %struct.poly64x1x3_t @test_vld1_p64_x3(i64* %a) #0 {
14952 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
14953 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
14954 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
14955 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
14956 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14957 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
14958 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
14959 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
14960 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
14961 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
14962 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14963 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
14964 // CHECK: ret %struct.poly64x1x3_t [[TMP6]]
test_vld1_p64_x3(poly64_t const * a)14965 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
14966 return vld1_p64_x3(a);
14967 }
14968
14969 // CHECK-LABEL: define %struct.uint8x16x4_t @test_vld1q_u8_x4(i8* %a) #0 {
14970 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
14971 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
14972 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
14973 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a)
14974 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
14975 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14976 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
14977 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
14978 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
14979 // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
14980 // CHECK: ret %struct.uint8x16x4_t [[TMP4]]
test_vld1q_u8_x4(uint8_t const * a)14981 uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) {
14982 return vld1q_u8_x4(a);
14983 }
14984
14985 // CHECK-LABEL: define %struct.uint16x8x4_t @test_vld1q_u16_x4(i16* %a) #0 {
14986 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
14987 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
14988 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
14989 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
14990 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14991 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
14992 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
14993 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14994 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
14995 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
14996 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
14997 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
14998 // CHECK: ret %struct.uint16x8x4_t [[TMP6]]
test_vld1q_u16_x4(uint16_t const * a)14999 uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) {
15000 return vld1q_u16_x4(a);
15001 }
15002
15003 // CHECK-LABEL: define %struct.uint32x4x4_t @test_vld1q_u32_x4(i32* %a) #0 {
15004 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
15005 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
15006 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
15007 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
15008 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15009 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]])
15010 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
15011 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
15012 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
15013 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
15014 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15015 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
15016 // CHECK: ret %struct.uint32x4x4_t [[TMP6]]
test_vld1q_u32_x4(uint32_t const * a)15017 uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) {
15018 return vld1q_u32_x4(a);
15019 }
15020
15021 // CHECK-LABEL: define %struct.uint64x2x4_t @test_vld1q_u64_x4(i64* %a) #0 {
15022 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
15023 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
15024 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
15025 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
15026 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15027 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
15028 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
15029 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
15030 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
15031 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
15032 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15033 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
15034 // CHECK: ret %struct.uint64x2x4_t [[TMP6]]
test_vld1q_u64_x4(uint64_t const * a)15035 uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) {
15036 return vld1q_u64_x4(a);
15037 }
15038
15039 // CHECK-LABEL: define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) #0 {
15040 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
15041 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
15042 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
15043 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a)
15044 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
15045 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
15046 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
15047 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
15048 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
15049 // CHECK: [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
15050 // CHECK: ret %struct.int8x16x4_t [[TMP4]]
test_vld1q_s8_x4(int8_t const * a)15051 int8x16x4_t test_vld1q_s8_x4(int8_t const *a) {
15052 return vld1q_s8_x4(a);
15053 }
15054
15055 // CHECK-LABEL: define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) #0 {
15056 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
15057 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
15058 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
15059 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
15060 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15061 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
15062 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
15063 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
15064 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
15065 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
15066 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15067 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
15068 // CHECK: ret %struct.int16x8x4_t [[TMP6]]
test_vld1q_s16_x4(int16_t const * a)15069 int16x8x4_t test_vld1q_s16_x4(int16_t const *a) {
15070 return vld1q_s16_x4(a);
15071 }
15072
15073 // CHECK-LABEL: define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) #0 {
15074 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
15075 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
15076 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
15077 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
15078 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15079 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]])
15080 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
15081 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
15082 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
15083 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
15084 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15085 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
15086 // CHECK: ret %struct.int32x4x4_t [[TMP6]]
test_vld1q_s32_x4(int32_t const * a)15087 int32x4x4_t test_vld1q_s32_x4(int32_t const *a) {
15088 return vld1q_s32_x4(a);
15089 }
15090
15091 // CHECK-LABEL: define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) #0 {
15092 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
15093 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
15094 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
15095 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
15096 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15097 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
15098 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
15099 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
15100 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
15101 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
15102 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15103 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
15104 // CHECK: ret %struct.int64x2x4_t [[TMP6]]
test_vld1q_s64_x4(int64_t const * a)15105 int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
15106 return vld1q_s64_x4(a);
15107 }
15108
15109 // CHECK-LABEL: define %struct.float16x8x4_t @test_vld1q_f16_x4(half* %a) #0 {
15110 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
15111 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
15112 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
15113 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
15114 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15115 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
15116 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
15117 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
15118 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
15119 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
15120 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15121 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
15122 // CHECK: ret %struct.float16x8x4_t [[TMP6]]
test_vld1q_f16_x4(float16_t const * a)15123 float16x8x4_t test_vld1q_f16_x4(float16_t const *a) {
15124 return vld1q_f16_x4(a);
15125 }
15126
15127 // CHECK-LABEL: define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) #0 {
15128 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
15129 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
15130 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
15131 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
15132 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
15133 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* [[TMP2]])
15134 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
15135 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
15136 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
15137 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
15138 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15139 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
15140 // CHECK: ret %struct.float32x4x4_t [[TMP6]]
test_vld1q_f32_x4(float32_t const * a)15141 float32x4x4_t test_vld1q_f32_x4(float32_t const *a) {
15142 return vld1q_f32_x4(a);
15143 }
15144
15145 // CHECK-LABEL: define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) #0 {
15146 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
15147 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
15148 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
15149 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
15150 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
15151 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]])
15152 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
15153 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
15154 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
15155 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
15156 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15157 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
15158 // CHECK: ret %struct.float64x2x4_t [[TMP6]]
test_vld1q_f64_x4(float64_t const * a)15159 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
15160 return vld1q_f64_x4(a);
15161 }
15162
15163 // CHECK-LABEL: define %struct.poly8x16x4_t @test_vld1q_p8_x4(i8* %a) #0 {
15164 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
15165 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
15166 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
15167 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a)
15168 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
15169 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
15170 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
15171 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
15172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
15173 // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
15174 // CHECK: ret %struct.poly8x16x4_t [[TMP4]]
test_vld1q_p8_x4(poly8_t const * a)15175 poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) {
15176 return vld1q_p8_x4(a);
15177 }
15178
15179 // CHECK-LABEL: define %struct.poly16x8x4_t @test_vld1q_p16_x4(i16* %a) #0 {
15180 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
15181 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
15182 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
15183 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
15184 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15185 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
15186 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
15187 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
15188 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
15189 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
15190 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15191 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
15192 // CHECK: ret %struct.poly16x8x4_t [[TMP6]]
test_vld1q_p16_x4(poly16_t const * a)15193 poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) {
15194 return vld1q_p16_x4(a);
15195 }
15196
15197 // CHECK-LABEL: define %struct.poly64x2x4_t @test_vld1q_p64_x4(i64* %a) #0 {
15198 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
15199 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
15200 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
15201 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
15202 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15203 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
15204 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
15205 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
15206 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
15207 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
15208 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15209 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
15210 // CHECK: ret %struct.poly64x2x4_t [[TMP6]]
test_vld1q_p64_x4(poly64_t const * a)15211 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
15212 return vld1q_p64_x4(a);
15213 }
15214
15215 // CHECK-LABEL: define %struct.uint8x8x4_t @test_vld1_u8_x4(i8* %a) #0 {
15216 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
15217 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
15218 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
15219 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a)
15220 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
15221 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
15222 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
15223 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
15224 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
15225 // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
15226 // CHECK: ret %struct.uint8x8x4_t [[TMP4]]
test_vld1_u8_x4(uint8_t const * a)15227 uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) {
15228 return vld1_u8_x4(a);
15229 }
15230
15231 // CHECK-LABEL: define %struct.uint16x4x4_t @test_vld1_u16_x4(i16* %a) #0 {
15232 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
15233 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
15234 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
15235 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
15236 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15237 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15238 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15239 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15240 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
15241 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
15242 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15243 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
15244 // CHECK: ret %struct.uint16x4x4_t [[TMP6]]
test_vld1_u16_x4(uint16_t const * a)15245 uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) {
15246 return vld1_u16_x4(a);
15247 }
15248
15249 // CHECK-LABEL: define %struct.uint32x2x4_t @test_vld1_u32_x4(i32* %a) #0 {
15250 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
15251 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
15252 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
15253 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
15254 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15255 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]])
15256 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
15257 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
15258 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
15259 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
15260 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15261 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
15262 // CHECK: ret %struct.uint32x2x4_t [[TMP6]]
test_vld1_u32_x4(uint32_t const * a)15263 uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) {
15264 return vld1_u32_x4(a);
15265 }
15266
15267 // CHECK-LABEL: define %struct.uint64x1x4_t @test_vld1_u64_x4(i64* %a) #0 {
15268 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
15269 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
15270 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
15271 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
15272 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15273 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
15274 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
15275 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
15276 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
15277 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
15278 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15279 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
15280 // CHECK: ret %struct.uint64x1x4_t [[TMP6]]
test_vld1_u64_x4(uint64_t const * a)15281 uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) {
15282 return vld1_u64_x4(a);
15283 }
15284
15285 // CHECK-LABEL: define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) #0 {
15286 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
15287 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
15288 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
15289 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a)
15290 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
15291 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
15292 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
15293 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
15294 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
15295 // CHECK: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
15296 // CHECK: ret %struct.int8x8x4_t [[TMP4]]
test_vld1_s8_x4(int8_t const * a)15297 int8x8x4_t test_vld1_s8_x4(int8_t const *a) {
15298 return vld1_s8_x4(a);
15299 }
15300
15301 // CHECK-LABEL: define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) #0 {
15302 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
15303 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
15304 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
15305 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
15306 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15307 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15308 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15309 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15310 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
15311 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
15312 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15313 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
15314 // CHECK: ret %struct.int16x4x4_t [[TMP6]]
test_vld1_s16_x4(int16_t const * a)15315 int16x4x4_t test_vld1_s16_x4(int16_t const *a) {
15316 return vld1_s16_x4(a);
15317 }
15318
15319 // CHECK-LABEL: define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) #0 {
15320 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
15321 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
15322 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
15323 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
15324 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15325 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]])
15326 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
15327 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
15328 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
15329 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
15330 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15331 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
15332 // CHECK: ret %struct.int32x2x4_t [[TMP6]]
test_vld1_s32_x4(int32_t const * a)15333 int32x2x4_t test_vld1_s32_x4(int32_t const *a) {
15334 return vld1_s32_x4(a);
15335 }
15336
15337 // CHECK-LABEL: define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) #0 {
15338 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
15339 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
15340 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
15341 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
15342 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15343 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
15344 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
15345 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
15346 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
15347 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
15348 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15349 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
15350 // CHECK: ret %struct.int64x1x4_t [[TMP6]]
test_vld1_s64_x4(int64_t const * a)15351 int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
15352 return vld1_s64_x4(a);
15353 }
15354
15355 // CHECK-LABEL: define %struct.float16x4x4_t @test_vld1_f16_x4(half* %a) #0 {
15356 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
15357 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
15358 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
15359 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
15360 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15361 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15362 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15363 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15364 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
15365 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
15366 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15367 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
15368 // CHECK: ret %struct.float16x4x4_t [[TMP6]]
test_vld1_f16_x4(float16_t const * a)15369 float16x4x4_t test_vld1_f16_x4(float16_t const *a) {
15370 return vld1_f16_x4(a);
15371 }
15372
15373 // CHECK-LABEL: define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) #0 {
15374 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
15375 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
15376 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
15377 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
15378 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
15379 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* [[TMP2]])
15380 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
15381 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
15382 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
15383 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
15384 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15385 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
15386 // CHECK: ret %struct.float32x2x4_t [[TMP6]]
test_vld1_f32_x4(float32_t const * a)15387 float32x2x4_t test_vld1_f32_x4(float32_t const *a) {
15388 return vld1_f32_x4(a);
15389 }
15390
15391 // CHECK-LABEL: define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) #0 {
15392 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
15393 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
15394 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
15395 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
15396 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
15397 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]])
15398 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
15399 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
15400 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
15401 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
15402 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15403 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
15404 // CHECK: ret %struct.float64x1x4_t [[TMP6]]
test_vld1_f64_x4(float64_t const * a)15405 float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
15406 return vld1_f64_x4(a);
15407 }
15408
15409 // CHECK-LABEL: define %struct.poly8x8x4_t @test_vld1_p8_x4(i8* %a) #0 {
15410 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
15411 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
15412 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
15413 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a)
15414 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
15415 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
15416 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
15417 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
15418 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
15419 // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
15420 // CHECK: ret %struct.poly8x8x4_t [[TMP4]]
test_vld1_p8_x4(poly8_t const * a)15421 poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) {
15422 return vld1_p8_x4(a);
15423 }
15424
15425 // CHECK-LABEL: define %struct.poly16x4x4_t @test_vld1_p16_x4(i16* %a) #0 {
15426 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
15427 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
15428 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
15429 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
15430 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15431 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15432 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15433 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15434 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
15435 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
15436 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15437 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
15438 // CHECK: ret %struct.poly16x4x4_t [[TMP6]]
test_vld1_p16_x4(poly16_t const * a)15439 poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) {
15440 return vld1_p16_x4(a);
15441 }
15442
15443 // CHECK-LABEL: define %struct.poly64x1x4_t @test_vld1_p64_x4(i64* %a) #0 {
15444 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
15445 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
15446 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
15447 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
15448 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15449 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
15450 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
15451 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
15452 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
15453 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
15454 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15455 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
15456 // CHECK: ret %struct.poly64x1x4_t [[TMP6]]
test_vld1_p64_x4(poly64_t const * a)15457 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
15458 return vld1_p64_x4(a);
15459 }
15460
15461 // CHECK-LABEL: define void @test_vst1q_u8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
15462 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
15463 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
15464 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
15465 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
15466 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
15467 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
15468 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15469 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15470 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
15471 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15472 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15473 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
15474 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15475 // CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
15476 // CHECK: ret void
test_vst1q_u8_x2(uint8_t * a,uint8x16x2_t b)15477 void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) {
15478 vst1q_u8_x2(a, b);
15479 }
15480
15481 // CHECK-LABEL: define void @test_vst1q_u16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
15482 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
15483 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
15484 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
15485 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
15486 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
15487 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
15488 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15489 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
15490 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15491 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
15492 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15493 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
15494 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15495 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
15496 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15497 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
15498 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15499 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15500 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15501 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15502 // CHECK: ret void
test_vst1q_u16_x2(uint16_t * a,uint16x8x2_t b)15503 void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) {
15504 vst1q_u16_x2(a, b);
15505 }
15506
15507 // CHECK-LABEL: define void @test_vst1q_u32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
15508 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
15509 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
15510 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
15511 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
15512 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
15513 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
15514 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15515 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
15516 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
15517 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
15518 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
15519 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
15520 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
15521 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
15522 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
15523 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
15524 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
15525 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
15526 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15527 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]])
15528 // CHECK: ret void
test_vst1q_u32_x2(uint32_t * a,uint32x4x2_t b)15529 void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) {
15530 vst1q_u32_x2(a, b);
15531 }
15532
15533 // CHECK-LABEL: define void @test_vst1q_u64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
15534 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
15535 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
15536 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
15537 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
15538 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
15539 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
15540 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15541 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
15542 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
15543 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
15544 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
15545 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
15546 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
15547 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
15548 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
15549 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
15550 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
15551 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
15552 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15553 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
15554 // CHECK: ret void
test_vst1q_u64_x2(uint64_t * a,uint64x2x2_t b)15555 void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) {
15556 vst1q_u64_x2(a, b);
15557 }
15558
15559 // CHECK-LABEL: define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
15560 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
15561 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
15562 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
15563 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
15564 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
15565 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
15566 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15567 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
15568 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
15569 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15570 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
15571 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
15572 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15573 // CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
15574 // CHECK: ret void
test_vst1q_s8_x2(int8_t * a,int8x16x2_t b)15575 void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) {
15576 vst1q_s8_x2(a, b);
15577 }
15578
15579 // CHECK-LABEL: define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
15580 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
15581 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
15582 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
15583 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
15584 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
15585 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
15586 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15587 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
15588 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
15589 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
15590 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15591 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
15592 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
15593 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
15594 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15595 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
15596 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15597 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15598 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15599 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15600 // CHECK: ret void
test_vst1q_s16_x2(int16_t * a,int16x8x2_t b)15601 void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) {
15602 vst1q_s16_x2(a, b);
15603 }
15604
15605 // CHECK-LABEL: define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
15606 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
15607 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
15608 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
15609 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
15610 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
15611 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
15612 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15613 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
15614 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
15615 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
15616 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
15617 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
15618 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
15619 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
15620 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
15621 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
15622 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
15623 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
15624 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15625 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]])
15626 // CHECK: ret void
test_vst1q_s32_x2(int32_t * a,int32x4x2_t b)15627 void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) {
15628 vst1q_s32_x2(a, b);
15629 }
15630
15631 // CHECK-LABEL: define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
15632 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
15633 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
15634 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
15635 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
15636 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
15637 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
15638 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15639 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
15640 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
15641 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
15642 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
15643 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
15644 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
15645 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
15646 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
15647 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
15648 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
15649 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
15650 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15651 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
15652 // CHECK: ret void
test_vst1q_s64_x2(int64_t * a,int64x2x2_t b)15653 void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) {
15654 vst1q_s64_x2(a, b);
15655 }
15656
15657 // CHECK-LABEL: define void @test_vst1q_f16_x2(half* %a, [2 x <8 x half>] %b.coerce) #0 {
15658 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
15659 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
15660 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
15661 // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
15662 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
15663 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
15664 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15665 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
15666 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
15667 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
15668 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
15669 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
15670 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
15671 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
15672 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
15673 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
15674 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15675 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15676 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15677 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15678 // CHECK: ret void
test_vst1q_f16_x2(float16_t * a,float16x8x2_t b)15679 void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) {
15680 vst1q_f16_x2(a, b);
15681 }
15682
15683 // CHECK-LABEL: define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b.coerce) #0 {
15684 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
15685 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
15686 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
15687 // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
15688 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
15689 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
15690 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15691 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
15692 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
15693 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
15694 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
15695 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
15696 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
15697 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
15698 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
15699 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
15700 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
15701 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
15702 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float*
15703 // CHECK: call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], float* [[TMP9]])
15704 // CHECK: ret void
test_vst1q_f32_x2(float32_t * a,float32x4x2_t b)15705 void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) {
15706 vst1q_f32_x2(a, b);
15707 }
15708
15709 // CHECK-LABEL: define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b.coerce) #0 {
15710 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
15711 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
15712 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
15713 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
15714 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
15715 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
15716 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15717 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
15718 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
15719 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
15720 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
15721 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
15722 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
15723 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
15724 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
15725 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
15726 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
15727 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
15728 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
15729 // CHECK: call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]])
15730 // CHECK: ret void
test_vst1q_f64_x2(float64_t * a,float64x2x2_t b)15731 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
15732 vst1q_f64_x2(a, b);
15733 }
15734
15735 // CHECK-LABEL: define void @test_vst1q_p8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
15736 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
15737 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
15738 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
15739 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
15740 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
15741 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
15742 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15743 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
15744 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
15745 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15746 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
15747 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
15748 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15749 // CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
15750 // CHECK: ret void
test_vst1q_p8_x2(poly8_t * a,poly8x16x2_t b)15751 void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) {
15752 vst1q_p8_x2(a, b);
15753 }
15754
15755 // CHECK-LABEL: define void @test_vst1q_p16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
15756 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
15757 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
15758 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
15759 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
15760 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
15761 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
15762 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15763 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
15764 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
15765 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
15766 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15767 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
15768 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
15769 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
15770 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15771 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
15772 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15773 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15774 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15775 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15776 // CHECK: ret void
test_vst1q_p16_x2(poly16_t * a,poly16x8x2_t b)15777 void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) {
15778 vst1q_p16_x2(a, b);
15779 }
15780
15781 // CHECK-LABEL: define void @test_vst1q_p64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
15782 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
15783 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
15784 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
15785 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
15786 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
15787 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
15788 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15789 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
15790 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
15791 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
15792 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
15793 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
15794 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
15795 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
15796 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
15797 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
15798 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
15799 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
15800 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15801 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
15802 // CHECK: ret void
test_vst1q_p64_x2(poly64_t * a,poly64x2x2_t b)15803 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
15804 vst1q_p64_x2(a, b);
15805 }
15806
15807 // CHECK-LABEL: define void @test_vst1_u8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
15808 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
15809 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
15810 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
15811 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
15812 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
15813 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
15814 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15815 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
15816 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
15817 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
15818 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
15819 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
15820 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
15821 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
15822 // CHECK: ret void
test_vst1_u8_x2(uint8_t * a,uint8x8x2_t b)15823 void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) {
15824 vst1_u8_x2(a, b);
15825 }
15826
15827 // CHECK-LABEL: define void @test_vst1_u16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
15828 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
15829 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
15830 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
15831 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
15832 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
15833 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
15834 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15835 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
15836 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
15837 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
15838 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
15839 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
15840 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
15841 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
15842 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
15843 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
15844 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
15845 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
15846 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15847 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
15848 // CHECK: ret void
test_vst1_u16_x2(uint16_t * a,uint16x4x2_t b)15849 void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) {
15850 vst1_u16_x2(a, b);
15851 }
15852
15853 // CHECK-LABEL: define void @test_vst1_u32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
15854 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
15855 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
15856 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
15857 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
15858 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
15859 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
15860 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15861 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
15862 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
15863 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
15864 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
15865 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
15866 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
15867 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
15868 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
15869 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
15870 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
15871 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
15872 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15873 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]])
15874 // CHECK: ret void
test_vst1_u32_x2(uint32_t * a,uint32x2x2_t b)15875 void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) {
15876 vst1_u32_x2(a, b);
15877 }
15878
15879 // CHECK-LABEL: define void @test_vst1_u64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
15880 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
15881 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
15882 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
15883 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
15884 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
15885 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
15886 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15887 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
15888 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
15889 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
15890 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
15891 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
15892 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
15893 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
15894 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
15895 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
15896 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
15897 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
15898 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15899 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
15900 // CHECK: ret void
test_vst1_u64_x2(uint64_t * a,uint64x1x2_t b)15901 void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) {
15902 vst1_u64_x2(a, b);
15903 }
15904
15905 // CHECK-LABEL: define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
15906 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
15907 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
15908 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
15909 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
15910 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
15911 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
15912 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15913 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
15914 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
15915 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
15916 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
15917 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
15918 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
15919 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
15920 // CHECK: ret void
test_vst1_s8_x2(int8_t * a,int8x8x2_t b)15921 void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) {
15922 vst1_s8_x2(a, b);
15923 }
15924
15925 // CHECK-LABEL: define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
15926 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
15927 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
15928 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
15929 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
15930 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
15931 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
15932 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15933 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
15934 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
15935 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
15936 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
15937 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
15938 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
15939 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
15940 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
15941 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
15942 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
15943 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
15944 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15945 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
15946 // CHECK: ret void
test_vst1_s16_x2(int16_t * a,int16x4x2_t b)15947 void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) {
15948 vst1_s16_x2(a, b);
15949 }
15950
15951 // CHECK-LABEL: define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
15952 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
15953 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
15954 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
15955 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
15956 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
15957 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
15958 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15959 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
15960 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
15961 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
15962 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
15963 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
15964 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
15965 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
15966 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
15967 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
15968 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
15969 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
15970 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15971 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]])
15972 // CHECK: ret void
test_vst1_s32_x2(int32_t * a,int32x2x2_t b)15973 void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) {
15974 vst1_s32_x2(a, b);
15975 }
15976
15977 // CHECK-LABEL: define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
15978 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
15979 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
15980 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
15981 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
15982 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
15983 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
15984 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15985 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
15986 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
15987 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
15988 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
15989 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
15990 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
15991 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
15992 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
15993 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
15994 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
15995 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
15996 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15997 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
15998 // CHECK: ret void
test_vst1_s64_x2(int64_t * a,int64x1x2_t b)15999 void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) {
16000 vst1_s64_x2(a, b);
16001 }
16002
16003 // CHECK-LABEL: define void @test_vst1_f16_x2(half* %a, [2 x <4 x half>] %b.coerce) #0 {
16004 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16005 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16006 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16007 // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
16008 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16009 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16010 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16011 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
16012 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16013 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
16014 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16015 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
16016 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16017 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
16018 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16019 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
16020 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16021 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16022 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
16023 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
16024 // CHECK: ret void
test_vst1_f16_x2(float16_t * a,float16x4x2_t b)16025 void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) {
16026 vst1_f16_x2(a, b);
16027 }
16028
16029 // CHECK-LABEL: define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b.coerce) #0 {
16030 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16031 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16032 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16033 // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
16034 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16035 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16036 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16037 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
16038 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16039 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
16040 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16041 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
16042 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16043 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
16044 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16045 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
16046 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
16047 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
16048 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float*
16049 // CHECK: call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], float* [[TMP9]])
16050 // CHECK: ret void
test_vst1_f32_x2(float32_t * a,float32x2x2_t b)16051 void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) {
16052 vst1_f32_x2(a, b);
16053 }
16054
16055 // CHECK-LABEL: define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b.coerce) #0 {
16056 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
16057 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
16058 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
16059 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
16060 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
16061 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
16062 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16063 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
16064 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
16065 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
16066 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
16067 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
16068 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
16069 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
16070 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
16071 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
16072 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
16073 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
16074 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
16075 // CHECK: call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]])
16076 // CHECK: ret void
test_vst1_f64_x2(float64_t * a,float64x1x2_t b)16077 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
16078 vst1_f64_x2(a, b);
16079 }
16080
16081 // CHECK-LABEL: define void @test_vst1_p8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
16082 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16083 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16084 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16085 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
16086 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16087 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16088 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16089 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16090 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
16091 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16094 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16095 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
16096 // CHECK: ret void
test_vst1_p8_x2(poly8_t * a,poly8x8x2_t b)16097 void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) {
16098 vst1_p8_x2(a, b);
16099 }
16100
16101 // CHECK-LABEL: define void @test_vst1_p16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
16102 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16103 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16104 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16105 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
16106 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16107 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16108 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16109 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
16110 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16111 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
16112 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16113 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16114 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16115 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16116 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16117 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16118 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16119 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16120 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
16121 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
16122 // CHECK: ret void
test_vst1_p16_x2(poly16_t * a,poly16x4x2_t b)16123 void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) {
16124 vst1_p16_x2(a, b);
16125 }
16126
16127 // CHECK-LABEL: define void @test_vst1_p64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
16128 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
16129 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
16130 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
16131 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
16132 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
16133 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
16134 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16135 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
16136 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
16137 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
16138 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16139 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16140 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
16141 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16142 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16143 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16144 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16145 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16146 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
16147 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
16148 // CHECK: ret void
test_vst1_p64_x2(poly64_t * a,poly64x1x2_t b)16149 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
16150 vst1_p64_x2(a, b);
16151 }
16152
16153 // CHECK-LABEL: define void @test_vst1q_u8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
16154 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
16155 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
16156 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
16157 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
16158 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
16159 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
16160 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16161 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16162 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
16163 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16164 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16165 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16166 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16167 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16168 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16169 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16170 // CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
16171 // CHECK: ret void
test_vst1q_u8_x3(uint8_t * a,uint8x16x3_t b)16172 void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) {
16173 vst1q_u8_x3(a, b);
16174 }
16175
16176 // CHECK-LABEL: define void @test_vst1q_u16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
16177 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
16178 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
16179 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
16180 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
16181 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
16182 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
16183 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16184 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
16185 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16186 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
16187 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16188 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
16189 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16190 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
16191 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16192 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
16193 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16194 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
16195 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16196 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
16197 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16198 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16199 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16200 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16201 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16202 // CHECK: ret void
test_vst1q_u16_x3(uint16_t * a,uint16x8x3_t b)16203 void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) {
16204 vst1q_u16_x3(a, b);
16205 }
16206
16207 // CHECK-LABEL: define void @test_vst1q_u32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
16208 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
16209 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
16210 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
16211 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
16212 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
16213 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
16214 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16215 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
16216 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16217 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
16218 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16219 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
16220 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16221 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
16222 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16223 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
16224 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16225 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
16226 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
16227 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
16228 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
16229 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
16230 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
16231 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16232 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]])
16233 // CHECK: ret void
test_vst1q_u32_x3(uint32_t * a,uint32x4x3_t b)16234 void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) {
16235 vst1q_u32_x3(a, b);
16236 }
16237
16238 // CHECK-LABEL: define void @test_vst1q_u64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
16239 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
16240 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
16241 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
16242 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
16243 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
16244 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
16245 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16246 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
16247 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
16248 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
16249 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
16250 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
16251 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
16252 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
16253 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
16254 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
16255 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
16256 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
16257 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
16258 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
16259 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
16260 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
16261 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
16262 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16263 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
16264 // CHECK: ret void
test_vst1q_u64_x3(uint64_t * a,uint64x2x3_t b)16265 void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) {
16266 vst1q_u64_x3(a, b);
16267 }
16268
16269 // CHECK-LABEL: define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
16270 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
16271 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
16272 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
16273 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
16274 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
16275 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
16276 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16277 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
16278 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
16279 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16280 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
16281 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16282 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16283 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
16284 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16285 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16286 // CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
16287 // CHECK: ret void
test_vst1q_s8_x3(int8_t * a,int8x16x3_t b)16288 void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) {
16289 vst1q_s8_x3(a, b);
16290 }
16291
16292 // CHECK-LABEL: define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
16293 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
16294 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
16295 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
16296 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
16297 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
16298 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
16299 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16300 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
16301 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
16302 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
16303 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16304 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
16305 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
16306 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
16307 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16308 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
16309 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
16310 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
16311 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16312 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
16313 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16314 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16315 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16316 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16317 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16318 // CHECK: ret void
test_vst1q_s16_x3(int16_t * a,int16x8x3_t b)16319 void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) {
16320 vst1q_s16_x3(a, b);
16321 }
16322
16323 // CHECK-LABEL: define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
16324 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
16325 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
16326 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
16327 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
16328 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
16329 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
16330 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16331 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
16332 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
16333 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
16334 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16335 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
16336 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
16337 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
16338 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16339 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
16340 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
16341 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
16342 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
16343 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
16344 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
16345 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
16346 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
16347 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16348 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]])
16349 // CHECK: ret void
test_vst1q_s32_x3(int32_t * a,int32x4x3_t b)16350 void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) {
16351 vst1q_s32_x3(a, b);
16352 }
16353
16354 // CHECK-LABEL: define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
16355 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
16356 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
16357 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
16358 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
16359 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
16360 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
16361 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16362 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
16363 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
16364 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
16365 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
16366 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
16367 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
16368 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
16369 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
16370 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
16371 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
16372 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
16373 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
16374 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
16375 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
16376 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
16377 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
16378 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16379 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
16380 // CHECK: ret void
test_vst1q_s64_x3(int64_t * a,int64x2x3_t b)16381 void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) {
16382 vst1q_s64_x3(a, b);
16383 }
16384
16385 // CHECK-LABEL: define void @test_vst1q_f16_x3(half* %a, [3 x <8 x half>] %b.coerce) #0 {
16386 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
16387 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
16388 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
16389 // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
16390 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
16391 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
16392 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16393 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
16394 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
16395 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
16396 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16397 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
16398 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
16399 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
16400 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16401 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
16402 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
16403 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
16404 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
16405 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
16406 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16407 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16408 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16409 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16410 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16411 // CHECK: ret void
test_vst1q_f16_x3(float16_t * a,float16x8x3_t b)16412 void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) {
16413 vst1q_f16_x3(a, b);
16414 }
16415
16416 // CHECK-LABEL: define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b.coerce) #0 {
16417 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
16418 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
16419 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
16420 // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
16421 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
16422 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
16423 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16424 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
16425 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
16426 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
16427 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16428 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
16429 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
16430 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
16431 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16432 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
16433 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
16434 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
16435 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
16436 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
16437 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
16438 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
16439 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
16440 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float*
16441 // CHECK: call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], float* [[TMP12]])
16442 // CHECK: ret void
test_vst1q_f32_x3(float32_t * a,float32x4x3_t b)16443 void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) {
16444 vst1q_f32_x3(a, b);
16445 }
16446
16447 // CHECK-LABEL: define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b.coerce) #0 {
16448 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
16449 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
16450 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
16451 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
16452 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
16453 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
16454 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16455 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
16456 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
16457 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
16458 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
16459 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
16460 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
16461 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
16462 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
16463 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
16464 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
16465 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
16466 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
16467 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
16468 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
16469 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
16470 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
16471 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
16472 // CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]])
16473 // CHECK: ret void
test_vst1q_f64_x3(float64_t * a,float64x2x3_t b)16474 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
16475 vst1q_f64_x3(a, b);
16476 }
16477
16478 // CHECK-LABEL: define void @test_vst1q_p8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
16479 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
16480 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
16481 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
16482 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
16483 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
16484 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
16485 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16486 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
16487 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
16488 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16489 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
16490 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16491 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16492 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
16493 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16494 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16495 // CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
16496 // CHECK: ret void
test_vst1q_p8_x3(poly8_t * a,poly8x16x3_t b)16497 void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) {
16498 vst1q_p8_x3(a, b);
16499 }
16500
16501 // CHECK-LABEL: define void @test_vst1q_p16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
16502 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
16503 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
16504 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
16505 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
16506 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
16507 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
16508 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16509 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
16510 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
16511 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
16512 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16513 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
16514 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
16515 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
16516 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16517 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
16518 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
16519 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
16520 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16521 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
16522 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16523 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16524 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16525 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16526 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16527 // CHECK: ret void
test_vst1q_p16_x3(poly16_t * a,poly16x8x3_t b)16528 void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) {
16529 vst1q_p16_x3(a, b);
16530 }
16531
16532 // CHECK-LABEL: define void @test_vst1q_p64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
16533 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
16534 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
16535 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
16536 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
16537 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
16538 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
16539 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16540 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
16541 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
16542 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
16543 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
16544 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
16545 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
16546 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
16547 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
16548 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
16549 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
16550 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
16551 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
16552 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
16553 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
16554 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
16555 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
16556 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16557 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
16558 // CHECK: ret void
test_vst1q_p64_x3(poly64_t * a,poly64x2x3_t b)16559 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
16560 vst1q_p64_x3(a, b);
16561 }
16562
16563 // CHECK-LABEL: define void @test_vst1_u8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
16564 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
16565 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
16566 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
16567 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
16568 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
16569 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
16570 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16571 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
16572 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
16573 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16574 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
16575 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16576 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16577 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
16578 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
16579 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
16580 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
16581 // CHECK: ret void
test_vst1_u8_x3(uint8_t * a,uint8x8x3_t b)16582 void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) {
16583 vst1_u8_x3(a, b);
16584 }
16585
16586 // CHECK-LABEL: define void @test_vst1_u16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
16587 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
16588 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
16589 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
16590 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
16591 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
16592 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
16593 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16594 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
16595 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
16596 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
16597 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16598 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16599 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
16600 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16601 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16602 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16603 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
16604 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
16605 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
16606 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
16607 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16608 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16609 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16610 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16611 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16612 // CHECK: ret void
test_vst1_u16_x3(uint16_t * a,uint16x4x3_t b)16613 void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) {
16614 vst1_u16_x3(a, b);
16615 }
16616
16617 // CHECK-LABEL: define void @test_vst1_u32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
16618 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
16619 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
16620 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
16621 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
16622 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
16623 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
16624 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16625 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
16626 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
16627 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
16628 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16629 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
16630 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
16631 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
16632 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16633 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
16634 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
16635 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
16636 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
16637 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
16638 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
16639 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
16640 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
16641 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16642 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]])
16643 // CHECK: ret void
test_vst1_u32_x3(uint32_t * a,uint32x2x3_t b)16644 void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) {
16645 vst1_u32_x3(a, b);
16646 }
16647
16648 // CHECK-LABEL: define void @test_vst1_u64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
16649 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
16650 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
16651 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
16652 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
16653 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
16654 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
16655 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16656 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
16657 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
16658 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
16659 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16660 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16661 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
16662 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16663 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16664 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16665 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
16666 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
16667 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
16668 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
16669 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16670 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16671 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
16672 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16673 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
16674 // CHECK: ret void
test_vst1_u64_x3(uint64_t * a,uint64x1x3_t b)16675 void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) {
16676 vst1_u64_x3(a, b);
16677 }
16678
16679 // CHECK-LABEL: define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
16680 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
16681 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
16682 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
16683 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
16684 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
16685 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
16686 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16687 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
16688 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
16689 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16690 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
16691 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16692 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16693 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
16694 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
16695 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
16696 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
16697 // CHECK: ret void
test_vst1_s8_x3(int8_t * a,int8x8x3_t b)16698 void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) {
16699 vst1_s8_x3(a, b);
16700 }
16701
16702 // CHECK-LABEL: define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
16703 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
16704 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
16705 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
16706 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
16707 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
16708 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
16709 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16710 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
16711 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
16712 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
16713 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16714 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16715 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
16716 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16717 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16718 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16719 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
16720 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
16721 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
16722 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
16723 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16724 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16725 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16726 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16727 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16728 // CHECK: ret void
test_vst1_s16_x3(int16_t * a,int16x4x3_t b)16729 void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) {
16730 vst1_s16_x3(a, b);
16731 }
16732
16733 // CHECK-LABEL: define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
16734 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
16735 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
16736 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
16737 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
16738 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
16739 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
16740 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16741 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
16742 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
16743 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
16744 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16745 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
16746 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
16747 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
16748 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16749 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
16750 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
16751 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
16752 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
16753 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
16754 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
16755 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
16756 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
16757 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16758 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]])
16759 // CHECK: ret void
test_vst1_s32_x3(int32_t * a,int32x2x3_t b)16760 void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) {
16761 vst1_s32_x3(a, b);
16762 }
16763
16764 // CHECK-LABEL: define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
16765 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
16766 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
16767 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
16768 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
16769 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
16770 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
16771 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16772 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
16773 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
16774 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
16775 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16776 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16777 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
16778 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16779 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16780 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16781 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
16782 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
16783 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
16784 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
16785 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16786 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16787 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
16788 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16789 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
16790 // CHECK: ret void
test_vst1_s64_x3(int64_t * a,int64x1x3_t b)16791 void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) {
16792 vst1_s64_x3(a, b);
16793 }
16794
16795 // CHECK-LABEL: define void @test_vst1_f16_x3(half* %a, [3 x <4 x half>] %b.coerce) #0 {
16796 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
16797 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
16798 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
16799 // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
16800 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
16801 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
16802 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16803 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
16804 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
16805 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
16806 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16807 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
16808 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
16809 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
16810 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16811 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
16812 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
16813 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
16814 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
16815 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
16816 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16817 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16818 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16819 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16820 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16821 // CHECK: ret void
test_vst1_f16_x3(float16_t * a,float16x4x3_t b)16822 void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) {
16823 vst1_f16_x3(a, b);
16824 }
16825
16826 // CHECK-LABEL: define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b.coerce) #0 {
16827 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
16828 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
16829 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
16830 // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
16831 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
16832 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
16833 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16834 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
16835 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
16836 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
16837 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16838 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
16839 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
16840 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
16841 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16842 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
16843 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
16844 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
16845 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
16846 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
16847 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
16848 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
16849 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
16850 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float*
16851 // CHECK: call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], float* [[TMP12]])
16852 // CHECK: ret void
test_vst1_f32_x3(float32_t * a,float32x2x3_t b)16853 void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) {
16854 vst1_f32_x3(a, b);
16855 }
16856
16857 // CHECK-LABEL: define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b.coerce) #0 {
16858 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
16859 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
16860 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
16861 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
16862 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
16863 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
16864 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16865 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
16866 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
16867 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
16868 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
16869 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
16870 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
16871 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
16872 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
16873 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
16874 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
16875 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
16876 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
16877 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
16878 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
16879 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
16880 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
16881 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
16882 // CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]])
16883 // CHECK: ret void
test_vst1_f64_x3(float64_t * a,float64x1x3_t b)16884 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
16885 vst1_f64_x3(a, b);
16886 }
16887
16888 // CHECK-LABEL: define void @test_vst1_p8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
16889 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
16890 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
16891 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
16892 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
16893 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
16894 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
16895 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16896 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
16897 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
16898 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16899 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
16900 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16901 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16902 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
16903 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
16904 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
16905 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
16906 // CHECK: ret void
test_vst1_p8_x3(poly8_t * a,poly8x8x3_t b)16907 void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) {
16908 vst1_p8_x3(a, b);
16909 }
16910
16911 // CHECK-LABEL: define void @test_vst1_p16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
16912 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
16913 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
16914 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
16915 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
16916 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
16917 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
16918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16919 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
16920 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
16921 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
16922 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16923 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16924 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
16925 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16926 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16927 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16928 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
16929 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
16930 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
16931 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
16932 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16933 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16934 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16935 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16936 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16937 // CHECK: ret void
test_vst1_p16_x3(poly16_t * a,poly16x4x3_t b)16938 void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) {
16939 vst1_p16_x3(a, b);
16940 }
16941
16942 // CHECK-LABEL: define void @test_vst1_p64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
16943 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
16944 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
16945 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
16946 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
16947 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
16948 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
16949 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16950 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
16951 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
16952 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
16953 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16954 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16955 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
16956 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16957 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16958 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16959 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
16960 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
16961 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
16962 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
16963 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16964 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16965 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
16966 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16967 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
16968 // CHECK: ret void
test_vst1_p64_x3(poly64_t * a,poly64x1x3_t b)16969 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
16970 vst1_p64_x3(a, b);
16971 }
16972
16973 // CHECK-LABEL: define void @test_vst1q_u8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
16974 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
16975 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
16976 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
16977 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
16978 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
16979 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
16980 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
16981 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16982 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
16983 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16984 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16985 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16986 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16987 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16988 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16989 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16990 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16991 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
16992 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
16993 // CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
16994 // CHECK: ret void
test_vst1q_u8_x4(uint8_t * a,uint8x16x4_t b)16995 void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) {
16996 vst1q_u8_x4(a, b);
16997 }
16998
16999 // CHECK-LABEL: define void @test_vst1q_u16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
17000 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
17001 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
17002 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
17003 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
17004 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
17005 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
17006 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17007 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
17008 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17009 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
17010 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17011 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
17012 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17013 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
17014 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17015 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
17016 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17017 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
17018 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17019 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
17020 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17021 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
17022 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
17023 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
17024 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17025 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17026 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17027 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17028 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17029 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17030 // CHECK: ret void
test_vst1q_u16_x4(uint16_t * a,uint16x8x4_t b)17031 void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) {
17032 vst1q_u16_x4(a, b);
17033 }
17034
17035 // CHECK-LABEL: define void @test_vst1q_u32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
17036 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
17037 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
17038 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
17039 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
17040 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
17041 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
17042 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17043 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
17044 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17045 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
17046 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17047 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
17048 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17049 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
17050 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17051 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
17052 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17053 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
17054 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17055 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
17056 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17057 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
17058 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
17059 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
17060 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
17061 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
17062 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
17063 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
17064 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17065 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]])
17066 // CHECK: ret void
test_vst1q_u32_x4(uint32_t * a,uint32x4x4_t b)17067 void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) {
17068 vst1q_u32_x4(a, b);
17069 }
17070
17071 // CHECK-LABEL: define void @test_vst1q_u64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
17072 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
17073 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
17074 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
17075 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
17076 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
17077 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
17078 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17079 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
17080 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17081 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
17082 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
17083 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
17084 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17085 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
17086 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
17087 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
17088 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17089 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
17090 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
17091 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
17092 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17093 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
17094 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
17095 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
17096 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
17097 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
17098 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
17099 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
17100 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17101 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
17102 // CHECK: ret void
test_vst1q_u64_x4(uint64_t * a,uint64x2x4_t b)17103 void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) {
17104 vst1q_u64_x4(a, b);
17105 }
17106
17107 // CHECK-LABEL: define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
17108 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
17109 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
17110 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
17111 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
17112 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
17113 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
17114 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17115 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17116 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
17117 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17118 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17119 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
17120 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17121 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17122 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
17123 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17124 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17125 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
17126 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
17127 // CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
17128 // CHECK: ret void
test_vst1q_s8_x4(int8_t * a,int8x16x4_t b)17129 void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) {
17130 vst1q_s8_x4(a, b);
17131 }
17132
17133 // CHECK-LABEL: define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
17134 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
17135 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
17136 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
17137 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
17138 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
17139 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
17140 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17141 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
17142 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17143 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
17144 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17145 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
17146 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17147 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
17148 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17149 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
17150 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17151 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
17152 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17153 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
17154 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17155 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
17156 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
17157 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
17158 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17159 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17160 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17161 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17162 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17163 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17164 // CHECK: ret void
test_vst1q_s16_x4(int16_t * a,int16x8x4_t b)17165 void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) {
17166 vst1q_s16_x4(a, b);
17167 }
17168
17169 // CHECK-LABEL: define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
17170 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
17171 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
17172 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
17173 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
17174 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
17175 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
17176 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17177 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
17178 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17179 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
17180 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17181 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
17182 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17183 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
17184 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17185 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
17186 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17187 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
17188 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17189 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
17190 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17191 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
17192 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
17193 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
17194 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
17195 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
17196 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
17197 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
17198 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17199 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]])
17200 // CHECK: ret void
test_vst1q_s32_x4(int32_t * a,int32x4x4_t b)17201 void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) {
17202 vst1q_s32_x4(a, b);
17203 }
17204
17205 // CHECK-LABEL: define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
17206 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
17207 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
17208 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
17209 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
17210 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
17211 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
17212 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17213 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
17214 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17215 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
17216 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
17217 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
17218 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17219 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
17220 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
17221 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
17222 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17223 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
17224 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
17225 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
17226 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17227 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
17228 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
17229 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
17230 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
17231 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
17232 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
17233 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
17234 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17235 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
17236 // CHECK: ret void
test_vst1q_s64_x4(int64_t * a,int64x2x4_t b)17237 void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) {
17238 vst1q_s64_x4(a, b);
17239 }
17240
17241 // CHECK-LABEL: define void @test_vst1q_f16_x4(half* %a, [4 x <8 x half>] %b.coerce) #0 {
17242 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
17243 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
17244 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
17245 // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
17246 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
17247 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
17248 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17249 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
17250 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17251 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
17252 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17253 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
17254 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17255 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
17256 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17257 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
17258 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17259 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
17260 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17261 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
17262 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17263 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
17264 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
17265 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
17266 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17267 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17268 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17269 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17270 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17271 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17272 // CHECK: ret void
test_vst1q_f16_x4(float16_t * a,float16x8x4_t b)17273 void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) {
17274 vst1q_f16_x4(a, b);
17275 }
17276
17277 // CHECK-LABEL: define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b.coerce) #0 {
17278 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
17279 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
17280 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
17281 // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
17282 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
17283 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
17284 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17285 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
17286 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17287 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
17288 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17289 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
17290 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17291 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
17292 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17293 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
17294 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17295 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
17296 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17297 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
17298 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17299 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
17300 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
17301 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
17302 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
17303 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
17304 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
17305 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
17306 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float*
17307 // CHECK: call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], float* [[TMP15]])
17308 // CHECK: ret void
test_vst1q_f32_x4(float32_t * a,float32x4x4_t b)17309 void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) {
17310 vst1q_f32_x4(a, b);
17311 }
17312
17313 // CHECK-LABEL: define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b.coerce) #0 {
17314 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
17315 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
17316 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
17317 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
17318 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
17319 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
17320 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17321 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
17322 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17323 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
17324 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
17325 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
17326 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17327 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
17328 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
17329 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
17330 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17331 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
17332 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
17333 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
17334 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17335 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
17336 // CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
17337 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
17338 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
17339 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
17340 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
17341 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
17342 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
17343 // CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]])
17344 // CHECK: ret void
test_vst1q_f64_x4(float64_t * a,float64x2x4_t b)17345 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
17346 vst1q_f64_x4(a, b);
17347 }
17348
17349 // CHECK-LABEL: define void @test_vst1q_p8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
17350 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
17351 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
17352 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
17353 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
17354 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
17355 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
17356 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17357 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17358 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
17359 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17360 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17361 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
17362 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17363 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17364 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
17365 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17366 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17367 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
17368 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
17369 // CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
17370 // CHECK: ret void
test_vst1q_p8_x4(poly8_t * a,poly8x16x4_t b)17371 void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) {
17372 vst1q_p8_x4(a, b);
17373 }
17374
17375 // CHECK-LABEL: define void @test_vst1q_p16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
17376 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
17377 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
17378 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
17379 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
17380 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
17381 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
17382 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17383 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
17384 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17385 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
17386 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17387 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
17388 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17389 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
17390 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17391 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
17392 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17393 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
17394 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17395 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
17396 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17397 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
17398 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
17399 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
17400 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17401 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17402 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17403 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17404 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17405 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17406 // CHECK: ret void
test_vst1q_p16_x4(poly16_t * a,poly16x8x4_t b)17407 void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) {
17408 vst1q_p16_x4(a, b);
17409 }
17410
17411 // CHECK-LABEL: define void @test_vst1q_p64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
17412 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
17413 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
17414 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
17415 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
17416 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
17417 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
17418 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17419 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
17420 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17421 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
17422 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
17423 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
17424 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17425 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
17426 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
17427 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
17428 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17429 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
17430 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
17431 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
17432 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17433 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
17434 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
17435 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
17436 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
17437 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
17438 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
17439 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
17440 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17441 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
17442 // CHECK: ret void
test_vst1q_p64_x4(poly64_t * a,poly64x2x4_t b)17443 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
17444 vst1q_p64_x4(a, b);
17445 }
17446
17447 // CHECK-LABEL: define void @test_vst1_u8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
17448 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
17449 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
17450 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
17451 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
17452 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
17453 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
17454 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17455 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17456 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
17457 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17458 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17459 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
17460 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17461 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17462 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
17463 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17464 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17465 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
17466 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
17467 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
17468 // CHECK: ret void
test_vst1_u8_x4(uint8_t * a,uint8x8x4_t b)17469 void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) {
17470 vst1_u8_x4(a, b);
17471 }
17472
17473 // CHECK-LABEL: define void @test_vst1_u16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
17474 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
17475 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
17476 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
17477 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
17478 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
17479 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
17480 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17481 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
17482 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17483 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
17484 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17485 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
17486 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17487 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
17488 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17489 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
17490 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17491 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
17492 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17493 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
17494 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17495 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
17496 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
17497 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
17498 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17499 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17500 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17501 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17502 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17503 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17504 // CHECK: ret void
test_vst1_u16_x4(uint16_t * a,uint16x4x4_t b)17505 void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) {
17506 vst1_u16_x4(a, b);
17507 }
17508
17509 // CHECK-LABEL: define void @test_vst1_u32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
17510 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
17511 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
17512 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
17513 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
17514 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
17515 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
17516 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17517 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
17518 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17519 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
17520 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17521 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
17522 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17523 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
17524 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17525 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
17526 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17527 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
17528 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17529 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
17530 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17531 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
17532 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
17533 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
17534 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
17535 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
17536 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
17537 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
17538 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17539 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]])
17540 // CHECK: ret void
test_vst1_u32_x4(uint32_t * a,uint32x2x4_t b)17541 void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) {
17542 vst1_u32_x4(a, b);
17543 }
17544
17545 // CHECK-LABEL: define void @test_vst1_u64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
17546 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
17547 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
17548 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
17549 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
17550 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
17551 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
17552 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17553 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
17554 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17555 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
17556 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17557 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
17558 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17559 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
17560 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17561 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
17562 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17563 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
17564 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17565 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
17566 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17567 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
17568 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
17569 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
17570 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
17571 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
17572 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
17573 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
17574 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17575 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
17576 // CHECK: ret void
test_vst1_u64_x4(uint64_t * a,uint64x1x4_t b)17577 void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) {
17578 vst1_u64_x4(a, b);
17579 }
17580
17581 // CHECK-LABEL: define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
17582 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
17583 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
17584 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
17585 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
17586 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
17587 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
17588 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17589 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17590 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
17591 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17592 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17593 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
17594 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17595 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17596 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
17597 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17598 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17599 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
17600 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
17601 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
17602 // CHECK: ret void
test_vst1_s8_x4(int8_t * a,int8x8x4_t b)17603 void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) {
17604 vst1_s8_x4(a, b);
17605 }
17606
17607 // CHECK-LABEL: define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
17608 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
17609 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
17610 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
17611 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
17612 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
17613 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
17614 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17615 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
17616 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17617 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
17618 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17619 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
17620 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17621 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
17622 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17623 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
17624 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17625 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
17626 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17627 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
17628 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17629 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
17630 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
17631 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
17632 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17633 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17634 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17635 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17636 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17637 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17638 // CHECK: ret void
test_vst1_s16_x4(int16_t * a,int16x4x4_t b)17639 void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) {
17640 vst1_s16_x4(a, b);
17641 }
17642
17643 // CHECK-LABEL: define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
17644 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
17645 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
17646 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
17647 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
17648 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
17649 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
17650 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17651 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
17652 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17653 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
17654 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17655 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
17656 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17657 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
17658 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17659 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
17660 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17661 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
17662 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17663 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
17664 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17665 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
17666 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
17667 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
17668 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
17669 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
17670 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
17671 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
17672 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17673 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]])
17674 // CHECK: ret void
test_vst1_s32_x4(int32_t * a,int32x2x4_t b)17675 void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) {
17676 vst1_s32_x4(a, b);
17677 }
17678
17679 // CHECK-LABEL: define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
17680 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
17681 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
17682 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
17683 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
17684 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
17685 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
17686 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17687 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
17688 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17689 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
17690 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17691 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
17692 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17693 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
17694 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17695 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
17696 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17697 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
17698 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17699 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
17700 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17701 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
17702 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
17703 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
17704 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
17705 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
17706 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
17707 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
17708 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17709 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
17710 // CHECK: ret void
test_vst1_s64_x4(int64_t * a,int64x1x4_t b)17711 void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) {
17712 vst1_s64_x4(a, b);
17713 }
17714
17715 // CHECK-LABEL: define void @test_vst1_f16_x4(half* %a, [4 x <4 x half>] %b.coerce) #0 {
17716 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
17717 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
17718 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
17719 // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
17720 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
17721 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
17722 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17723 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
17724 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17725 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
17726 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
17727 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
17728 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17729 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
17730 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
17731 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
17732 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17733 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
17734 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
17735 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
17736 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17737 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
17738 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
17739 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
17740 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17741 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17742 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17743 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17744 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17745 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17746 // CHECK: ret void
test_vst1_f16_x4(float16_t * a,float16x4x4_t b)17747 void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) {
17748 vst1_f16_x4(a, b);
17749 }
17750
17751 // CHECK-LABEL: define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b.coerce) #0 {
17752 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
17753 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
17754 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
17755 // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
17756 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
17757 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
17758 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17759 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
17760 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17761 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
17762 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
17763 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
17764 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17765 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
17766 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
17767 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
17768 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17769 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
17770 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
17771 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
17772 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17773 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
17774 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
17775 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
17776 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
17777 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
17778 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
17779 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
17780 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float*
17781 // CHECK: call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], float* [[TMP15]])
17782 // CHECK: ret void
test_vst1_f32_x4(float32_t * a,float32x2x4_t b)17783 void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) {
17784 vst1_f32_x4(a, b);
17785 }
17786
17787 // CHECK-LABEL: define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b.coerce) #0 {
17788 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
17789 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
17790 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
17791 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
17792 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
17793 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
17794 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17795 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
17796 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17797 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
17798 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
17799 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
17800 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17801 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
17802 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
17803 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
17804 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17805 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
17806 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
17807 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
17808 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17809 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
17810 // CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
17811 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
17812 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
17813 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
17814 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
17815 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
17816 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
17817 // CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]])
17818 // CHECK: ret void
test_vst1_f64_x4(float64_t * a,float64x1x4_t b)17819 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
17820 vst1_f64_x4(a, b);
17821 }
17822
17823 // CHECK-LABEL: define void @test_vst1_p8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
17824 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
17825 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
17826 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
17827 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
17828 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
17829 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
17830 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17831 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17832 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
17833 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17834 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17835 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
17836 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17837 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17838 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
17839 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17840 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17841 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
17842 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
17843 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
17844 // CHECK: ret void
test_vst1_p8_x4(poly8_t * a,poly8x8x4_t b)17845 void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) {
17846 vst1_p8_x4(a, b);
17847 }
17848
17849 // CHECK-LABEL: define void @test_vst1_p16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
17850 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
17851 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
17852 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
17853 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
17854 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
17855 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
17856 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17857 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
17858 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17859 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
17860 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17861 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
17862 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17863 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
17864 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17865 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
17866 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17867 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
17868 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17869 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
17870 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17871 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
17872 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
17873 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
17874 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17875 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17876 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17877 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17878 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17879 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17880 // CHECK: ret void
test_vst1_p16_x4(poly16_t * a,poly16x4x4_t b)17881 void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) {
17882 vst1_p16_x4(a, b);
17883 }
17884
17885 // CHECK-LABEL: define void @test_vst1_p64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
17886 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
17887 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
17888 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
17889 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
17890 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
17891 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
17892 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17893 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
17894 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17895 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
17896 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17897 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
17898 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17899 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
17900 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17901 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
17902 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17903 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
17904 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17905 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
17906 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17907 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
17908 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
17909 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
17910 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
17911 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
17912 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
17913 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
17914 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17915 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
17916 // CHECK: ret void
test_vst1_p64_x4(poly64_t * a,poly64x1x4_t b)17917 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
17918 vst1_p64_x4(a, b);
17919 }
17920
17921 // CHECK-LABEL: define i64 @test_vceqd_s64(i64 %a, i64 %b) #0 {
17922 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
17923 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17924 // CHECK: ret i64 [[VCEQD_I]]
test_vceqd_s64(int64_t a,int64_t b)17925 int64_t test_vceqd_s64(int64_t a, int64_t b) {
17926 return (int64_t)vceqd_s64(a, b);
17927 }
17928
17929 // CHECK-LABEL: define i64 @test_vceqd_u64(i64 %a, i64 %b) #0 {
17930 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
17931 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17932 // CHECK: ret i64 [[VCEQD_I]]
test_vceqd_u64(uint64_t a,uint64_t b)17933 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
17934 return (int64_t)vceqd_u64(a, b);
17935 }
17936
17937 // CHECK-LABEL: define i64 @test_vceqzd_s64(i64 %a) #0 {
17938 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
17939 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
17940 // CHECK: ret i64 [[VCEQZ_I]]
test_vceqzd_s64(int64_t a)17941 int64_t test_vceqzd_s64(int64_t a) {
17942 return (int64_t)vceqzd_s64(a);
17943 }
17944
17945 // CHECK-LABEL: define i64 @test_vceqzd_u64(i64 %a) #0 {
17946 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
17947 // CHECK: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
17948 // CHECK: ret i64 [[VCEQZD_I]]
test_vceqzd_u64(int64_t a)17949 int64_t test_vceqzd_u64(int64_t a) {
17950 return (int64_t)vceqzd_u64(a);
17951 }
17952
17953 // CHECK-LABEL: define i64 @test_vcged_s64(i64 %a, i64 %b) #0 {
17954 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, %b
17955 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17956 // CHECK: ret i64 [[VCEQD_I]]
test_vcged_s64(int64_t a,int64_t b)17957 int64_t test_vcged_s64(int64_t a, int64_t b) {
17958 return (int64_t)vcged_s64(a, b);
17959 }
17960
17961 // CHECK-LABEL: define i64 @test_vcged_u64(i64 %a, i64 %b) #0 {
17962 // CHECK: [[TMP0:%.*]] = icmp uge i64 %a, %b
17963 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17964 // CHECK: ret i64 [[VCEQD_I]]
test_vcged_u64(uint64_t a,uint64_t b)17965 uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
17966 return (uint64_t)vcged_u64(a, b);
17967 }
17968
17969 // CHECK-LABEL: define i64 @test_vcgezd_s64(i64 %a) #0 {
17970 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, 0
17971 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
17972 // CHECK: ret i64 [[VCGEZ_I]]
test_vcgezd_s64(int64_t a)17973 int64_t test_vcgezd_s64(int64_t a) {
17974 return (int64_t)vcgezd_s64(a);
17975 }
17976
17977 // CHECK-LABEL: define i64 @test_vcgtd_s64(i64 %a, i64 %b) #0 {
17978 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, %b
17979 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17980 // CHECK: ret i64 [[VCEQD_I]]
test_vcgtd_s64(int64_t a,int64_t b)17981 int64_t test_vcgtd_s64(int64_t a, int64_t b) {
17982 return (int64_t)vcgtd_s64(a, b);
17983 }
17984
17985 // CHECK-LABEL: define i64 @test_vcgtd_u64(i64 %a, i64 %b) #0 {
17986 // CHECK: [[TMP0:%.*]] = icmp ugt i64 %a, %b
17987 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17988 // CHECK: ret i64 [[VCEQD_I]]
test_vcgtd_u64(uint64_t a,uint64_t b)17989 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
17990 return (uint64_t)vcgtd_u64(a, b);
17991 }
17992
17993 // CHECK-LABEL: define i64 @test_vcgtzd_s64(i64 %a) #0 {
17994 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, 0
17995 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
17996 // CHECK: ret i64 [[VCGTZ_I]]
test_vcgtzd_s64(int64_t a)17997 int64_t test_vcgtzd_s64(int64_t a) {
17998 return (int64_t)vcgtzd_s64(a);
17999 }
18000
18001 // CHECK-LABEL: define i64 @test_vcled_s64(i64 %a, i64 %b) #0 {
18002 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, %b
18003 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18004 // CHECK: ret i64 [[VCEQD_I]]
test_vcled_s64(int64_t a,int64_t b)18005 int64_t test_vcled_s64(int64_t a, int64_t b) {
18006 return (int64_t)vcled_s64(a, b);
18007 }
18008
18009 // CHECK-LABEL: define i64 @test_vcled_u64(i64 %a, i64 %b) #0 {
18010 // CHECK: [[TMP0:%.*]] = icmp ule i64 %a, %b
18011 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18012 // CHECK: ret i64 [[VCEQD_I]]
test_vcled_u64(uint64_t a,uint64_t b)18013 uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
18014 return (uint64_t)vcled_u64(a, b);
18015 }
18016
18017 // CHECK-LABEL: define i64 @test_vclezd_s64(i64 %a) #0 {
18018 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, 0
18019 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
18020 // CHECK: ret i64 [[VCLEZ_I]]
test_vclezd_s64(int64_t a)18021 int64_t test_vclezd_s64(int64_t a) {
18022 return (int64_t)vclezd_s64(a);
18023 }
18024
18025 // CHECK-LABEL: define i64 @test_vcltd_s64(i64 %a, i64 %b) #0 {
18026 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, %b
18027 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18028 // CHECK: ret i64 [[VCEQD_I]]
test_vcltd_s64(int64_t a,int64_t b)18029 int64_t test_vcltd_s64(int64_t a, int64_t b) {
18030 return (int64_t)vcltd_s64(a, b);
18031 }
18032
18033 // CHECK-LABEL: define i64 @test_vcltd_u64(i64 %a, i64 %b) #0 {
18034 // CHECK: [[TMP0:%.*]] = icmp ult i64 %a, %b
18035 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18036 // CHECK: ret i64 [[VCEQD_I]]
test_vcltd_u64(uint64_t a,uint64_t b)18037 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
18038 return (uint64_t)vcltd_u64(a, b);
18039 }
18040
18041 // CHECK-LABEL: define i64 @test_vcltzd_s64(i64 %a) #0 {
18042 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, 0
18043 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
18044 // CHECK: ret i64 [[VCLTZ_I]]
test_vcltzd_s64(int64_t a)18045 int64_t test_vcltzd_s64(int64_t a) {
18046 return (int64_t)vcltzd_s64(a);
18047 }
18048
18049 // CHECK-LABEL: define i64 @test_vtstd_s64(i64 %a, i64 %b) #0 {
18050 // CHECK: [[TMP0:%.*]] = and i64 %a, %b
18051 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
18052 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
18053 // CHECK: ret i64 [[VTSTD_I]]
test_vtstd_s64(int64_t a,int64_t b)18054 int64_t test_vtstd_s64(int64_t a, int64_t b) {
18055 return (int64_t)vtstd_s64(a, b);
18056 }
18057
18058 // CHECK-LABEL: define i64 @test_vtstd_u64(i64 %a, i64 %b) #0 {
18059 // CHECK: [[TMP0:%.*]] = and i64 %a, %b
18060 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
18061 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
18062 // CHECK: ret i64 [[VTSTD_I]]
test_vtstd_u64(uint64_t a,uint64_t b)18063 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
18064 return (uint64_t)vtstd_u64(a, b);
18065 }
18066
18067 // CHECK-LABEL: define i64 @test_vabsd_s64(i64 %a) #0 {
18068 // CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a) #4
18069 // CHECK: ret i64 [[VABSD_S64_I]]
test_vabsd_s64(int64_t a)18070 int64_t test_vabsd_s64(int64_t a) {
18071 return (int64_t)vabsd_s64(a);
18072 }
18073
18074 // CHECK-LABEL: define i8 @test_vqabsb_s8(i8 %a) #0 {
18075 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18076 // CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]]) #4
18077 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
18078 // CHECK: ret i8 [[TMP1]]
test_vqabsb_s8(int8_t a)18079 int8_t test_vqabsb_s8(int8_t a) {
18080 return (int8_t)vqabsb_s8(a);
18081 }
18082
18083 // CHECK-LABEL: define i16 @test_vqabsh_s16(i16 %a) #0 {
18084 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18085 // CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]]) #4
18086 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
18087 // CHECK: ret i16 [[TMP1]]
test_vqabsh_s16(int16_t a)18088 int16_t test_vqabsh_s16(int16_t a) {
18089 return (int16_t)vqabsh_s16(a);
18090 }
18091
18092 // CHECK-LABEL: define i32 @test_vqabss_s32(i32 %a) #0 {
18093 // CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) #4
18094 // CHECK: ret i32 [[VQABSS_S32_I]]
test_vqabss_s32(int32_t a)18095 int32_t test_vqabss_s32(int32_t a) {
18096 return (int32_t)vqabss_s32(a);
18097 }
18098
18099 // CHECK-LABEL: define i64 @test_vqabsd_s64(i64 %a) #0 {
18100 // CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a) #4
18101 // CHECK: ret i64 [[VQABSD_S64_I]]
test_vqabsd_s64(int64_t a)18102 int64_t test_vqabsd_s64(int64_t a) {
18103 return (int64_t)vqabsd_s64(a);
18104 }
18105
18106 // CHECK-LABEL: define i64 @test_vnegd_s64(i64 %a) #0 {
18107 // CHECK: [[VNEGD_I:%.*]] = sub i64 0, %a
18108 // CHECK: ret i64 [[VNEGD_I]]
test_vnegd_s64(int64_t a)18109 int64_t test_vnegd_s64(int64_t a) {
18110 return (int64_t)vnegd_s64(a);
18111 }
18112
18113 // CHECK-LABEL: define i8 @test_vqnegb_s8(i8 %a) #0 {
18114 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18115 // CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]]) #4
18116 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
18117 // CHECK: ret i8 [[TMP1]]
test_vqnegb_s8(int8_t a)18118 int8_t test_vqnegb_s8(int8_t a) {
18119 return (int8_t)vqnegb_s8(a);
18120 }
18121
18122 // CHECK-LABEL: define i16 @test_vqnegh_s16(i16 %a) #0 {
18123 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18124 // CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]]) #4
18125 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
18126 // CHECK: ret i16 [[TMP1]]
test_vqnegh_s16(int16_t a)18127 int16_t test_vqnegh_s16(int16_t a) {
18128 return (int16_t)vqnegh_s16(a);
18129 }
18130
18131 // CHECK-LABEL: define i32 @test_vqnegs_s32(i32 %a) #0 {
18132 // CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a) #4
18133 // CHECK: ret i32 [[VQNEGS_S32_I]]
test_vqnegs_s32(int32_t a)18134 int32_t test_vqnegs_s32(int32_t a) {
18135 return (int32_t)vqnegs_s32(a);
18136 }
18137
18138 // CHECK-LABEL: define i64 @test_vqnegd_s64(i64 %a) #0 {
18139 // CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a) #4
18140 // CHECK: ret i64 [[VQNEGD_S64_I]]
test_vqnegd_s64(int64_t a)18141 int64_t test_vqnegd_s64(int64_t a) {
18142 return (int64_t)vqnegd_s64(a);
18143 }
18144
18145 // CHECK-LABEL: define i8 @test_vuqaddb_s8(i8 %a, i8 %b) #0 {
18146 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18147 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
18148 // CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
18149 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
18150 // CHECK: ret i8 [[TMP2]]
test_vuqaddb_s8(int8_t a,int8_t b)18151 int8_t test_vuqaddb_s8(int8_t a, int8_t b) {
18152 return (int8_t)vuqaddb_s8(a, b);
18153 }
18154
18155 // CHECK-LABEL: define i16 @test_vuqaddh_s16(i16 %a, i16 %b) #0 {
18156 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18157 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18158 // CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18159 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
18160 // CHECK: ret i16 [[TMP2]]
test_vuqaddh_s16(int16_t a,int16_t b)18161 int16_t test_vuqaddh_s16(int16_t a, int16_t b) {
18162 return (int16_t)vuqaddh_s16(a, b);
18163 }
18164
18165 // CHECK-LABEL: define i32 @test_vuqadds_s32(i32 %a, i32 %b) #0 {
18166 // CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b) #4
18167 // CHECK: ret i32 [[VUQADDS_S32_I]]
test_vuqadds_s32(int32_t a,int32_t b)18168 int32_t test_vuqadds_s32(int32_t a, int32_t b) {
18169 return (int32_t)vuqadds_s32(a, b);
18170 }
18171
18172 // CHECK-LABEL: define i64 @test_vuqaddd_s64(i64 %a, i64 %b) #0 {
18173 // CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b) #4
18174 // CHECK: ret i64 [[VUQADDD_S64_I]]
test_vuqaddd_s64(int64_t a,int64_t b)18175 int64_t test_vuqaddd_s64(int64_t a, int64_t b) {
18176 return (int64_t)vuqaddd_s64(a, b);
18177 }
18178
18179 // CHECK-LABEL: define i8 @test_vsqaddb_u8(i8 %a, i8 %b) #0 {
18180 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18181 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
18182 // CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
18183 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
18184 // CHECK: ret i8 [[TMP2]]
test_vsqaddb_u8(uint8_t a,uint8_t b)18185 uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) {
18186 return (uint8_t)vsqaddb_u8(a, b);
18187 }
18188
18189 // CHECK-LABEL: define i16 @test_vsqaddh_u16(i16 %a, i16 %b) #0 {
18190 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18191 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18192 // CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18193 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
18194 // CHECK: ret i16 [[TMP2]]
test_vsqaddh_u16(uint16_t a,uint16_t b)18195 uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) {
18196 return (uint16_t)vsqaddh_u16(a, b);
18197 }
18198
18199 // CHECK-LABEL: define i32 @test_vsqadds_u32(i32 %a, i32 %b) #0 {
18200 // CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) #4
18201 // CHECK: ret i32 [[VSQADDS_U32_I]]
test_vsqadds_u32(uint32_t a,uint32_t b)18202 uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) {
18203 return (uint32_t)vsqadds_u32(a, b);
18204 }
18205
18206 // CHECK-LABEL: define i64 @test_vsqaddd_u64(i64 %a, i64 %b) #0 {
18207 // CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) #4
18208 // CHECK: ret i64 [[VSQADDD_U64_I]]
test_vsqaddd_u64(uint64_t a,uint64_t b)18209 uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) {
18210 return (uint64_t)vsqaddd_u64(a, b);
18211 }
18212
18213 // CHECK-LABEL: define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) #0 {
18214 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18215 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
18216 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18217 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
18218 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]]) #4
18219 // CHECK: ret i32 [[VQDMLXL1_I]]
test_vqdmlalh_s16(int32_t a,int16_t b,int16_t c)18220 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
18221
18222 return (int32_t)vqdmlalh_s16(a, b, c);
18223 }
18224
18225 // CHECK-LABEL: define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) #0 {
18226 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4
18227 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]]) #4
18228 // CHECK: ret i64 [[VQDMLXL1_I]]
test_vqdmlals_s32(int64_t a,int32_t b,int32_t c)18229 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
18230 return (int64_t)vqdmlals_s32(a, b, c);
18231 }
18232
18233 // CHECK-LABEL: define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) #0 {
18234 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18235 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
18236 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18237 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
18238 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]]) #4
18239 // CHECK: ret i32 [[VQDMLXL1_I]]
test_vqdmlslh_s16(int32_t a,int16_t b,int16_t c)18240 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
18241
18242 return (int32_t)vqdmlslh_s16(a, b, c);
18243 }
18244
18245 // CHECK-LABEL: define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) #0 {
18246 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4
18247 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]]) #4
18248 // CHECK: ret i64 [[VQDMLXL1_I]]
test_vqdmlsls_s32(int64_t a,int32_t b,int32_t c)18249 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
18250 return (int64_t)vqdmlsls_s32(a, b, c);
18251 }
18252
18253 // CHECK-LABEL: define i32 @test_vqdmullh_s16(i16 %a, i16 %b) #0 {
18254 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18255 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18256 // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18257 // CHECK: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
18258 // CHECK: ret i32 [[TMP2]]
test_vqdmullh_s16(int16_t a,int16_t b)18259 int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
18260 return (int32_t)vqdmullh_s16(a, b);
18261 }
18262
18263 // CHECK-LABEL: define i64 @test_vqdmulls_s32(i32 %a, i32 %b) #0 {
18264 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b) #4
18265 // CHECK: ret i64 [[VQDMULLS_S32_I]]
test_vqdmulls_s32(int32_t a,int32_t b)18266 int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
18267 return (int64_t)vqdmulls_s32(a, b);
18268 }
18269
18270 // CHECK-LABEL: define i8 @test_vqmovunh_s16(i16 %a) #0 {
18271 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
18272 // CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]]) #4
18273 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
18274 // CHECK: ret i8 [[TMP1]]
test_vqmovunh_s16(int16_t a)18275 int8_t test_vqmovunh_s16(int16_t a) {
18276 return (int8_t)vqmovunh_s16(a);
18277 }
18278
18279 // CHECK-LABEL: define i16 @test_vqmovuns_s32(i32 %a) #0 {
18280 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
18281 // CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]]) #4
18282 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
18283 // CHECK: ret i16 [[TMP1]]
test_vqmovuns_s32(int32_t a)18284 int16_t test_vqmovuns_s32(int32_t a) {
18285 return (int16_t)vqmovuns_s32(a);
18286 }
18287
18288 // CHECK-LABEL: define i32 @test_vqmovund_s64(i64 %a) #0 {
18289 // CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a) #4
18290 // CHECK: ret i32 [[VQMOVUND_S64_I]]
test_vqmovund_s64(int64_t a)18291 int32_t test_vqmovund_s64(int64_t a) {
18292 return (int32_t)vqmovund_s64(a);
18293 }
18294
18295 // CHECK-LABEL: define i8 @test_vqmovnh_s16(i16 %a) #0 {
18296 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
18297 // CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]]) #4
18298 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
18299 // CHECK: ret i8 [[TMP1]]
test_vqmovnh_s16(int16_t a)18300 int8_t test_vqmovnh_s16(int16_t a) {
18301 return (int8_t)vqmovnh_s16(a);
18302 }
18303
18304 // CHECK-LABEL: define i16 @test_vqmovns_s32(i32 %a) #0 {
18305 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
18306 // CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]]) #4
18307 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
18308 // CHECK: ret i16 [[TMP1]]
test_vqmovns_s32(int32_t a)18309 int16_t test_vqmovns_s32(int32_t a) {
18310 return (int16_t)vqmovns_s32(a);
18311 }
18312
18313 // CHECK-LABEL: define i32 @test_vqmovnd_s64(i64 %a) #0 {
18314 // CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a) #4
18315 // CHECK: ret i32 [[VQMOVND_S64_I]]
test_vqmovnd_s64(int64_t a)18316 int32_t test_vqmovnd_s64(int64_t a) {
18317 return (int32_t)vqmovnd_s64(a);
18318 }
18319
18320 // CHECK-LABEL: define i8 @test_vqmovnh_u16(i16 %a) #0 {
18321 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
18322 // CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]]) #4
18323 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
18324 // CHECK: ret i8 [[TMP1]]
test_vqmovnh_u16(int16_t a)18325 int8_t test_vqmovnh_u16(int16_t a) {
18326 return (int8_t)vqmovnh_u16(a);
18327 }
18328
18329 // CHECK-LABEL: define i16 @test_vqmovns_u32(i32 %a) #0 {
18330 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
18331 // CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]]) #4
18332 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
18333 // CHECK: ret i16 [[TMP1]]
test_vqmovns_u32(int32_t a)18334 int16_t test_vqmovns_u32(int32_t a) {
18335 return (int16_t)vqmovns_u32(a);
18336 }
18337
18338 // CHECK-LABEL: define i32 @test_vqmovnd_u64(i64 %a) #0 {
18339 // CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a) #4
18340 // CHECK: ret i32 [[VQMOVND_U64_I]]
test_vqmovnd_u64(int64_t a)18341 int32_t test_vqmovnd_u64(int64_t a) {
18342 return (int32_t)vqmovnd_u64(a);
18343 }
18344
18345 // CHECK-LABEL: define i32 @test_vceqs_f32(float %a, float %b) #0 {
18346 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, %b
18347 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18348 // CHECK: ret i32 [[VCMPD_I]]
test_vceqs_f32(float32_t a,float32_t b)18349 uint32_t test_vceqs_f32(float32_t a, float32_t b) {
18350 return (uint32_t)vceqs_f32(a, b);
18351 }
18352
18353 // CHECK-LABEL: define i64 @test_vceqd_f64(double %a, double %b) #0 {
18354 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, %b
18355 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18356 // CHECK: ret i64 [[VCMPD_I]]
test_vceqd_f64(float64_t a,float64_t b)18357 uint64_t test_vceqd_f64(float64_t a, float64_t b) {
18358 return (uint64_t)vceqd_f64(a, b);
18359 }
18360
18361 // CHECK-LABEL: define i32 @test_vceqzs_f32(float %a) #0 {
18362 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
18363 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
18364 // CHECK: ret i32 [[VCEQZ_I]]
test_vceqzs_f32(float32_t a)18365 uint32_t test_vceqzs_f32(float32_t a) {
18366 return (uint32_t)vceqzs_f32(a);
18367 }
18368
18369 // CHECK-LABEL: define i64 @test_vceqzd_f64(double %a) #0 {
18370 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
18371 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
18372 // CHECK: ret i64 [[VCEQZ_I]]
test_vceqzd_f64(float64_t a)18373 uint64_t test_vceqzd_f64(float64_t a) {
18374 return (uint64_t)vceqzd_f64(a);
18375 }
18376
18377 // CHECK-LABEL: define i32 @test_vcges_f32(float %a, float %b) #0 {
18378 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, %b
18379 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18380 // CHECK: ret i32 [[VCMPD_I]]
test_vcges_f32(float32_t a,float32_t b)18381 uint32_t test_vcges_f32(float32_t a, float32_t b) {
18382 return (uint32_t)vcges_f32(a, b);
18383 }
18384
18385 // CHECK-LABEL: define i64 @test_vcged_f64(double %a, double %b) #0 {
18386 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, %b
18387 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18388 // CHECK: ret i64 [[VCMPD_I]]
test_vcged_f64(float64_t a,float64_t b)18389 uint64_t test_vcged_f64(float64_t a, float64_t b) {
18390 return (uint64_t)vcged_f64(a, b);
18391 }
18392
18393 // CHECK-LABEL: define i32 @test_vcgezs_f32(float %a) #0 {
18394 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
18395 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
18396 // CHECK: ret i32 [[VCGEZ_I]]
test_vcgezs_f32(float32_t a)18397 uint32_t test_vcgezs_f32(float32_t a) {
18398 return (uint32_t)vcgezs_f32(a);
18399 }
18400
18401 // CHECK-LABEL: define i64 @test_vcgezd_f64(double %a) #0 {
18402 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
18403 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
18404 // CHECK: ret i64 [[VCGEZ_I]]
test_vcgezd_f64(float64_t a)18405 uint64_t test_vcgezd_f64(float64_t a) {
18406 return (uint64_t)vcgezd_f64(a);
18407 }
18408
18409 // CHECK-LABEL: define i32 @test_vcgts_f32(float %a, float %b) #0 {
18410 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, %b
18411 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18412 // CHECK: ret i32 [[VCMPD_I]]
test_vcgts_f32(float32_t a,float32_t b)18413 uint32_t test_vcgts_f32(float32_t a, float32_t b) {
18414 return (uint32_t)vcgts_f32(a, b);
18415 }
18416
18417 // CHECK-LABEL: define i64 @test_vcgtd_f64(double %a, double %b) #0 {
18418 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, %b
18419 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18420 // CHECK: ret i64 [[VCMPD_I]]
test_vcgtd_f64(float64_t a,float64_t b)18421 uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
18422 return (uint64_t)vcgtd_f64(a, b);
18423 }
18424
18425 // CHECK-LABEL: define i32 @test_vcgtzs_f32(float %a) #0 {
18426 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
18427 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
18428 // CHECK: ret i32 [[VCGTZ_I]]
test_vcgtzs_f32(float32_t a)18429 uint32_t test_vcgtzs_f32(float32_t a) {
18430 return (uint32_t)vcgtzs_f32(a);
18431 }
18432
18433 // CHECK-LABEL: define i64 @test_vcgtzd_f64(double %a) #0 {
18434 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
18435 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
18436 // CHECK: ret i64 [[VCGTZ_I]]
test_vcgtzd_f64(float64_t a)18437 uint64_t test_vcgtzd_f64(float64_t a) {
18438 return (uint64_t)vcgtzd_f64(a);
18439 }
18440
18441 // CHECK-LABEL: define i32 @test_vcles_f32(float %a, float %b) #0 {
18442 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, %b
18443 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18444 // CHECK: ret i32 [[VCMPD_I]]
test_vcles_f32(float32_t a,float32_t b)18445 uint32_t test_vcles_f32(float32_t a, float32_t b) {
18446 return (uint32_t)vcles_f32(a, b);
18447 }
18448
18449 // CHECK-LABEL: define i64 @test_vcled_f64(double %a, double %b) #0 {
18450 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, %b
18451 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18452 // CHECK: ret i64 [[VCMPD_I]]
test_vcled_f64(float64_t a,float64_t b)18453 uint64_t test_vcled_f64(float64_t a, float64_t b) {
18454 return (uint64_t)vcled_f64(a, b);
18455 }
18456
18457 // CHECK-LABEL: define i32 @test_vclezs_f32(float %a) #0 {
18458 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
18459 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
18460 // CHECK: ret i32 [[VCLEZ_I]]
test_vclezs_f32(float32_t a)18461 uint32_t test_vclezs_f32(float32_t a) {
18462 return (uint32_t)vclezs_f32(a);
18463 }
18464
18465 // CHECK-LABEL: define i64 @test_vclezd_f64(double %a) #0 {
18466 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
18467 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
18468 // CHECK: ret i64 [[VCLEZ_I]]
test_vclezd_f64(float64_t a)18469 uint64_t test_vclezd_f64(float64_t a) {
18470 return (uint64_t)vclezd_f64(a);
18471 }
18472
18473 // CHECK-LABEL: define i32 @test_vclts_f32(float %a, float %b) #0 {
18474 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, %b
18475 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18476 // CHECK: ret i32 [[VCMPD_I]]
test_vclts_f32(float32_t a,float32_t b)18477 uint32_t test_vclts_f32(float32_t a, float32_t b) {
18478 return (uint32_t)vclts_f32(a, b);
18479 }
18480
18481 // CHECK-LABEL: define i64 @test_vcltd_f64(double %a, double %b) #0 {
18482 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, %b
18483 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18484 // CHECK: ret i64 [[VCMPD_I]]
test_vcltd_f64(float64_t a,float64_t b)18485 uint64_t test_vcltd_f64(float64_t a, float64_t b) {
18486 return (uint64_t)vcltd_f64(a, b);
18487 }
18488
18489 // CHECK-LABEL: define i32 @test_vcltzs_f32(float %a) #0 {
18490 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
18491 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
18492 // CHECK: ret i32 [[VCLTZ_I]]
test_vcltzs_f32(float32_t a)18493 uint32_t test_vcltzs_f32(float32_t a) {
18494 return (uint32_t)vcltzs_f32(a);
18495 }
18496
18497 // CHECK-LABEL: define i64 @test_vcltzd_f64(double %a) #0 {
18498 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
18499 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
18500 // CHECK: ret i64 [[VCLTZ_I]]
test_vcltzd_f64(float64_t a)18501 uint64_t test_vcltzd_f64(float64_t a) {
18502 return (uint64_t)vcltzd_f64(a);
18503 }
18504
18505 // CHECK-LABEL: define i32 @test_vcages_f32(float %a, float %b) #0 {
18506 // CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b) #4
18507 // CHECK: ret i32 [[VCAGES_F32_I]]
test_vcages_f32(float32_t a,float32_t b)18508 uint32_t test_vcages_f32(float32_t a, float32_t b) {
18509 return (uint32_t)vcages_f32(a, b);
18510 }
18511
18512 // CHECK-LABEL: define i64 @test_vcaged_f64(double %a, double %b) #0 {
18513 // CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b) #4
18514 // CHECK: ret i64 [[VCAGED_F64_I]]
test_vcaged_f64(float64_t a,float64_t b)18515 uint64_t test_vcaged_f64(float64_t a, float64_t b) {
18516 return (uint64_t)vcaged_f64(a, b);
18517 }
18518
18519 // CHECK-LABEL: define i32 @test_vcagts_f32(float %a, float %b) #0 {
18520 // CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b) #4
18521 // CHECK: ret i32 [[VCAGTS_F32_I]]
test_vcagts_f32(float32_t a,float32_t b)18522 uint32_t test_vcagts_f32(float32_t a, float32_t b) {
18523 return (uint32_t)vcagts_f32(a, b);
18524 }
18525
18526 // CHECK-LABEL: define i64 @test_vcagtd_f64(double %a, double %b) #0 {
18527 // CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b) #4
18528 // CHECK: ret i64 [[VCAGTD_F64_I]]
test_vcagtd_f64(float64_t a,float64_t b)18529 uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
18530 return (uint64_t)vcagtd_f64(a, b);
18531 }
18532
18533 // CHECK-LABEL: define i32 @test_vcales_f32(float %a, float %b) #0 {
18534 // CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a) #4
18535 // CHECK: ret i32 [[VCALES_F32_I]]
test_vcales_f32(float32_t a,float32_t b)18536 uint32_t test_vcales_f32(float32_t a, float32_t b) {
18537 return (uint32_t)vcales_f32(a, b);
18538 }
18539
18540 // CHECK-LABEL: define i64 @test_vcaled_f64(double %a, double %b) #0 {
18541 // CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a) #4
18542 // CHECK: ret i64 [[VCALED_F64_I]]
test_vcaled_f64(float64_t a,float64_t b)18543 uint64_t test_vcaled_f64(float64_t a, float64_t b) {
18544 return (uint64_t)vcaled_f64(a, b);
18545 }
18546
18547 // CHECK-LABEL: define i32 @test_vcalts_f32(float %a, float %b) #0 {
18548 // CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a) #4
18549 // CHECK: ret i32 [[VCALTS_F32_I]]
test_vcalts_f32(float32_t a,float32_t b)18550 uint32_t test_vcalts_f32(float32_t a, float32_t b) {
18551 return (uint32_t)vcalts_f32(a, b);
18552 }
18553
18554 // CHECK-LABEL: define i64 @test_vcaltd_f64(double %a, double %b) #0 {
18555 // CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a) #4
18556 // CHECK: ret i64 [[VCALTD_F64_I]]
test_vcaltd_f64(float64_t a,float64_t b)18557 uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
18558 return (uint64_t)vcaltd_f64(a, b);
18559 }
18560
18561 // CHECK-LABEL: define i64 @test_vshrd_n_s64(i64 %a) #0 {
18562 // CHECK: [[SHRD_N:%.*]] = ashr i64 %a, 1
18563 // CHECK: ret i64 [[SHRD_N]]
test_vshrd_n_s64(int64_t a)18564 int64_t test_vshrd_n_s64(int64_t a) {
18565 return (int64_t)vshrd_n_s64(a, 1);
18566 }
18567
18568 // CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 {
18569 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18571 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
18572 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)18573 int64x1_t test_vshr_n_s64(int64x1_t a) {
18574 return vshr_n_s64(a, 1);
18575 }
18576
18577 // CHECK-LABEL: define i64 @test_vshrd_n_u64(i64 %a) #0 {
18578 // CHECK: ret i64 0
test_vshrd_n_u64(uint64_t a)18579 uint64_t test_vshrd_n_u64(uint64_t a) {
18580
18581 return (uint64_t)vshrd_n_u64(a, 64);
18582 }
18583
18584 // CHECK-LABEL: define i64 @test_vshrd_n_u64_2() #0 {
18585 // CHECK: ret i64 0
test_vshrd_n_u64_2()18586 uint64_t test_vshrd_n_u64_2() {
18587
18588 uint64_t a = UINT64_C(0xf000000000000000);
18589 return vshrd_n_u64(a, 64);
18590 }
18591
18592 // CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 {
18593 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18594 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18595 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
18596 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)18597 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
18598 return vshr_n_u64(a, 1);
18599 }
18600
18601 // CHECK-LABEL: define i64 @test_vrshrd_n_s64(i64 %a) #0 {
18602 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
18603 // CHECK: ret i64 [[VRSHR_N]]
test_vrshrd_n_s64(int64_t a)18604 int64_t test_vrshrd_n_s64(int64_t a) {
18605 return (int64_t)vrshrd_n_s64(a, 63);
18606 }
18607
18608 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 {
18609 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18610 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18611 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18612 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)18613 int64x1_t test_vrshr_n_s64(int64x1_t a) {
18614 return vrshr_n_s64(a, 1);
18615 }
18616
18617 // CHECK-LABEL: define i64 @test_vrshrd_n_u64(i64 %a) #0 {
18618 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
18619 // CHECK: ret i64 [[VRSHR_N]]
test_vrshrd_n_u64(uint64_t a)18620 uint64_t test_vrshrd_n_u64(uint64_t a) {
18621 return (uint64_t)vrshrd_n_u64(a, 63);
18622 }
18623
18624 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 {
18625 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18626 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18627 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18628 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)18629 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
18630 return vrshr_n_u64(a, 1);
18631 }
18632
18633 // CHECK-LABEL: define i64 @test_vsrad_n_s64(i64 %a, i64 %b) #0 {
18634 // CHECK: [[SHRD_N:%.*]] = ashr i64 %b, 63
18635 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
18636 // CHECK: ret i64 [[TMP0]]
test_vsrad_n_s64(int64_t a,int64_t b)18637 int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
18638 return (int64_t)vsrad_n_s64(a, b, 63);
18639 }
18640
18641 // CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
18642 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18643 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18644 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18645 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18646 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
18647 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
18648 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)18649 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
18650 return vsra_n_s64(a, b, 1);
18651 }
18652
18653 // CHECK-LABEL: define i64 @test_vsrad_n_u64(i64 %a, i64 %b) #0 {
18654 // CHECK: [[SHRD_N:%.*]] = lshr i64 %b, 63
18655 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
18656 // CHECK: ret i64 [[TMP0]]
test_vsrad_n_u64(uint64_t a,uint64_t b)18657 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
18658 return (uint64_t)vsrad_n_u64(a, b, 63);
18659 }
18660
18661 // CHECK-LABEL: define i64 @test_vsrad_n_u64_2(i64 %a, i64 %b) #0 {
18662 // CHECK: ret i64 %a
test_vsrad_n_u64_2(uint64_t a,uint64_t b)18663 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
18664
18665 return (uint64_t)vsrad_n_u64(a, b, 64);
18666 }
18667
18668 // CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
18669 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18670 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18671 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18672 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18673 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
18674 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
18675 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)18676 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
18677 return vsra_n_u64(a, b, 1);
18678 }
18679
18680 // CHECK-LABEL: define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) #0 {
18681 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
18682 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
18683 // CHECK: ret i64 [[TMP1]]
test_vrsrad_n_s64(int64_t a,int64_t b)18684 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
18685 return (int64_t)vrsrad_n_s64(a, b, 63);
18686 }
18687
18688 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
18689 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18690 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18691 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18692 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18693 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18694 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
18695 // CHECK: ret <1 x i64> [[TMP3]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)18696 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
18697 return vrsra_n_s64(a, b, 1);
18698 }
18699
18700 // CHECK-LABEL: define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) #0 {
18701 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
18702 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
18703 // CHECK: ret i64 [[TMP1]]
test_vrsrad_n_u64(uint64_t a,uint64_t b)18704 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
18705 return (uint64_t)vrsrad_n_u64(a, b, 63);
18706 }
18707
18708 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
18709 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18710 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18711 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18712 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18713 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18714 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
18715 // CHECK: ret <1 x i64> [[TMP3]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)18716 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
18717 return vrsra_n_u64(a, b, 1);
18718 }
18719
18720 // CHECK-LABEL: define i64 @test_vshld_n_s64(i64 %a) #0 {
18721 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 1
18722 // CHECK: ret i64 [[SHLD_N]]
test_vshld_n_s64(int64_t a)18723 int64_t test_vshld_n_s64(int64_t a) {
18724 return (int64_t)vshld_n_s64(a, 1);
18725 }
18726 // CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 {
18727 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18728 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18729 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
18730 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)18731 int64x1_t test_vshl_n_s64(int64x1_t a) {
18732 return vshl_n_s64(a, 1);
18733 }
18734
18735 // CHECK-LABEL: define i64 @test_vshld_n_u64(i64 %a) #0 {
18736 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 63
18737 // CHECK: ret i64 [[SHLD_N]]
test_vshld_n_u64(uint64_t a)18738 uint64_t test_vshld_n_u64(uint64_t a) {
18739 return (uint64_t)vshld_n_u64(a, 63);
18740 }
18741
18742 // CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 {
18743 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18744 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18745 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
18746 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)18747 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
18748 return vshl_n_u64(a, 1);
18749 }
18750
18751 // CHECK-LABEL: define i8 @test_vqshlb_n_s8(i8 %a) #0 {
18752 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18753 // CHECK: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18754 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
18755 // CHECK: ret i8 [[TMP1]]
test_vqshlb_n_s8(int8_t a)18756 int8_t test_vqshlb_n_s8(int8_t a) {
18757 return (int8_t)vqshlb_n_s8(a, 7);
18758 }
18759
18760 // CHECK-LABEL: define i16 @test_vqshlh_n_s16(i16 %a) #0 {
18761 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18762 // CHECK: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
18763 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
18764 // CHECK: ret i16 [[TMP1]]
test_vqshlh_n_s16(int16_t a)18765 int16_t test_vqshlh_n_s16(int16_t a) {
18766 return (int16_t)vqshlh_n_s16(a, 15);
18767 }
18768
18769 // CHECK-LABEL: define i32 @test_vqshls_n_s32(i32 %a) #0 {
18770 // CHECK: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
18771 // CHECK: ret i32 [[VQSHLS_N_S32]]
test_vqshls_n_s32(int32_t a)18772 int32_t test_vqshls_n_s32(int32_t a) {
18773 return (int32_t)vqshls_n_s32(a, 31);
18774 }
18775
18776 // CHECK-LABEL: define i64 @test_vqshld_n_s64(i64 %a) #0 {
18777 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
18778 // CHECK: ret i64 [[VQSHL_N]]
test_vqshld_n_s64(int64_t a)18779 int64_t test_vqshld_n_s64(int64_t a) {
18780 return (int64_t)vqshld_n_s64(a, 63);
18781 }
18782
18783 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 {
18784 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
18785 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)18786 int8x8_t test_vqshl_n_s8(int8x8_t a) {
18787 return vqshl_n_s8(a, 0);
18788 }
18789
18790 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 {
18791 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
18792 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)18793 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
18794 return vqshlq_n_s8(a, 0);
18795 }
18796
18797 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 {
18798 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
18799 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
18800 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
18801 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)18802 int16x4_t test_vqshl_n_s16(int16x4_t a) {
18803 return vqshl_n_s16(a, 0);
18804 }
18805
18806 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 {
18807 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
18808 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
18809 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
18810 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)18811 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
18812 return vqshlq_n_s16(a, 0);
18813 }
18814
18815 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 {
18816 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18817 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
18818 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
18819 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)18820 int32x2_t test_vqshl_n_s32(int32x2_t a) {
18821 return vqshl_n_s32(a, 0);
18822 }
18823
18824 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 {
18825 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
18826 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
18827 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
18828 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)18829 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
18830 return vqshlq_n_s32(a, 0);
18831 }
18832
18833 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 {
18834 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
18835 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
18836 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
18837 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)18838 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
18839 return vqshlq_n_s64(a, 0);
18840 }
18841
18842 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 {
18843 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
18844 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)18845 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
18846 return vqshl_n_u8(a, 0);
18847 }
18848
18849 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 {
18850 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
18851 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)18852 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
18853 return vqshlq_n_u8(a, 0);
18854 }
18855
18856 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 {
18857 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
18858 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
18859 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
18860 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)18861 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
18862 return vqshl_n_u16(a, 0);
18863 }
18864
18865 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 {
18866 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
18867 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
18868 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
18869 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)18870 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
18871 return vqshlq_n_u16(a, 0);
18872 }
18873
18874 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 {
18875 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18876 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
18877 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
18878 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)18879 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
18880 return vqshl_n_u32(a, 0);
18881 }
18882
18883 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 {
18884 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
18885 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
18886 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
18887 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)18888 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
18889 return vqshlq_n_u32(a, 0);
18890 }
18891
18892 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 {
18893 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
18894 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
18895 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
18896 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)18897 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
18898 return vqshlq_n_u64(a, 0);
18899 }
18900
18901 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 {
18902 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18903 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18904 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
18905 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)18906 int64x1_t test_vqshl_n_s64(int64x1_t a) {
18907 return vqshl_n_s64(a, 1);
18908 }
18909
18910 // CHECK-LABEL: define i8 @test_vqshlb_n_u8(i8 %a) #0 {
18911 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18912 // CHECK: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18913 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
18914 // CHECK: ret i8 [[TMP1]]
test_vqshlb_n_u8(uint8_t a)18915 uint8_t test_vqshlb_n_u8(uint8_t a) {
18916 return (uint8_t)vqshlb_n_u8(a, 7);
18917 }
18918
18919 // CHECK-LABEL: define i16 @test_vqshlh_n_u16(i16 %a) #0 {
18920 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18921 // CHECK: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
18922 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
18923 // CHECK: ret i16 [[TMP1]]
test_vqshlh_n_u16(uint16_t a)18924 uint16_t test_vqshlh_n_u16(uint16_t a) {
18925 return (uint16_t)vqshlh_n_u16(a, 15);
18926 }
18927
18928 // CHECK-LABEL: define i32 @test_vqshls_n_u32(i32 %a) #0 {
18929 // CHECK: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
18930 // CHECK: ret i32 [[VQSHLS_N_U32]]
test_vqshls_n_u32(uint32_t a)18931 uint32_t test_vqshls_n_u32(uint32_t a) {
18932 return (uint32_t)vqshls_n_u32(a, 31);
18933 }
18934
18935 // CHECK-LABEL: define i64 @test_vqshld_n_u64(i64 %a) #0 {
18936 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
18937 // CHECK: ret i64 [[VQSHL_N]]
test_vqshld_n_u64(uint64_t a)18938 uint64_t test_vqshld_n_u64(uint64_t a) {
18939 return (uint64_t)vqshld_n_u64(a, 63);
18940 }
18941
18942 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 {
18943 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18944 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18945 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
18946 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)18947 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
18948 return vqshl_n_u64(a, 1);
18949 }
18950
18951 // CHECK-LABEL: define i8 @test_vqshlub_n_s8(i8 %a) #0 {
18952 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18953 // CHECK: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18954 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
18955 // CHECK: ret i8 [[TMP1]]
test_vqshlub_n_s8(int8_t a)18956 int8_t test_vqshlub_n_s8(int8_t a) {
18957 return (int8_t)vqshlub_n_s8(a, 7);
18958 }
18959
18960 // CHECK-LABEL: define i16 @test_vqshluh_n_s16(i16 %a) #0 {
18961 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18962 // CHECK: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
18963 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
18964 // CHECK: ret i16 [[TMP1]]
test_vqshluh_n_s16(int16_t a)18965 int16_t test_vqshluh_n_s16(int16_t a) {
18966 return (int16_t)vqshluh_n_s16(a, 15);
18967 }
18968
18969 // CHECK-LABEL: define i32 @test_vqshlus_n_s32(i32 %a) #0 {
18970 // CHECK: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
18971 // CHECK: ret i32 [[VQSHLUS_N_S32]]
test_vqshlus_n_s32(int32_t a)18972 int32_t test_vqshlus_n_s32(int32_t a) {
18973 return (int32_t)vqshlus_n_s32(a, 31);
18974 }
18975
18976 // CHECK-LABEL: define i64 @test_vqshlud_n_s64(i64 %a) #0 {
18977 // CHECK: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
18978 // CHECK: ret i64 [[VQSHLU_N]]
test_vqshlud_n_s64(int64_t a)18979 int64_t test_vqshlud_n_s64(int64_t a) {
18980 return (int64_t)vqshlud_n_s64(a, 63);
18981 }
18982
18983 // CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 {
18984 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18985 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18986 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
18987 // CHECK: ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)18988 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
18989 return vqshlu_n_s64(a, 1);
18990 }
18991
18992 // CHECK-LABEL: define i64 @test_vsrid_n_s64(i64 %a, i64 %b) #0 {
18993 // CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
18994 // CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
18995 // CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
18996 // CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
18997 // CHECK: ret i64 [[VSRID_N_S643]]
test_vsrid_n_s64(int64_t a,int64_t b)18998 int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
18999 return (int64_t)vsrid_n_s64(a, b, 63);
19000 }
19001
19002 // CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
19003 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19004 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19005 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19006 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19007 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
19008 // CHECK: ret <1 x i64> [[VSRI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)19009 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
19010 return vsri_n_s64(a, b, 1);
19011 }
19012
19013 // CHECK-LABEL: define i64 @test_vsrid_n_u64(i64 %a, i64 %b) #0 {
19014 // CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
19015 // CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
19016 // CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
19017 // CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
19018 // CHECK: ret i64 [[VSRID_N_U643]]
test_vsrid_n_u64(uint64_t a,uint64_t b)19019 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
19020 return (uint64_t)vsrid_n_u64(a, b, 63);
19021 }
19022
19023 // CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
19024 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19025 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19026 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19027 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19028 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
19029 // CHECK: ret <1 x i64> [[VSRI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)19030 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
19031 return vsri_n_u64(a, b, 1);
19032 }
19033
19034 // CHECK-LABEL: define i64 @test_vslid_n_s64(i64 %a, i64 %b) #0 {
19035 // CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
19036 // CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
19037 // CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
19038 // CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
19039 // CHECK: ret i64 [[VSLID_N_S643]]
test_vslid_n_s64(int64_t a,int64_t b)19040 int64_t test_vslid_n_s64(int64_t a, int64_t b) {
19041 return (int64_t)vslid_n_s64(a, b, 63);
19042 }
19043
19044 // CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
19045 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19046 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19047 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19048 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19049 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
19050 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)19051 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
19052 return vsli_n_s64(a, b, 1);
19053 }
19054
19055 // CHECK-LABEL: define i64 @test_vslid_n_u64(i64 %a, i64 %b) #0 {
19056 // CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
19057 // CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
19058 // CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
19059 // CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
19060 // CHECK: ret i64 [[VSLID_N_U643]]
test_vslid_n_u64(uint64_t a,uint64_t b)19061 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
19062 return (uint64_t)vslid_n_u64(a, b, 63);
19063 }
19064
19065 // CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
19066 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19067 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19068 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19069 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19070 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
19071 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)19072 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
19073 return vsli_n_u64(a, b, 1);
19074 }
19075
19076 // CHECK-LABEL: define i8 @test_vqshrnh_n_s16(i16 %a) #0 {
19077 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19078 // CHECK: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19079 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
19080 // CHECK: ret i8 [[TMP1]]
test_vqshrnh_n_s16(int16_t a)19081 int8_t test_vqshrnh_n_s16(int16_t a) {
19082 return (int8_t)vqshrnh_n_s16(a, 8);
19083 }
19084
19085 // CHECK-LABEL: define i16 @test_vqshrns_n_s32(i32 %a) #0 {
19086 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19087 // CHECK: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19088 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
19089 // CHECK: ret i16 [[TMP1]]
test_vqshrns_n_s32(int32_t a)19090 int16_t test_vqshrns_n_s32(int32_t a) {
19091 return (int16_t)vqshrns_n_s32(a, 16);
19092 }
19093
19094 // CHECK-LABEL: define i32 @test_vqshrnd_n_s64(i64 %a) #0 {
19095 // CHECK: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
19096 // CHECK: ret i32 [[VQSHRND_N_S64]]
test_vqshrnd_n_s64(int64_t a)19097 int32_t test_vqshrnd_n_s64(int64_t a) {
19098 return (int32_t)vqshrnd_n_s64(a, 32);
19099 }
19100
19101 // CHECK-LABEL: define i8 @test_vqshrnh_n_u16(i16 %a) #0 {
19102 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19103 // CHECK: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19104 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
19105 // CHECK: ret i8 [[TMP1]]
test_vqshrnh_n_u16(uint16_t a)19106 uint8_t test_vqshrnh_n_u16(uint16_t a) {
19107 return (uint8_t)vqshrnh_n_u16(a, 8);
19108 }
19109
19110 // CHECK-LABEL: define i16 @test_vqshrns_n_u32(i32 %a) #0 {
19111 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19112 // CHECK: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19113 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
19114 // CHECK: ret i16 [[TMP1]]
test_vqshrns_n_u32(uint32_t a)19115 uint16_t test_vqshrns_n_u32(uint32_t a) {
19116 return (uint16_t)vqshrns_n_u32(a, 16);
19117 }
19118
19119 // CHECK-LABEL: define i32 @test_vqshrnd_n_u64(i64 %a) #0 {
19120 // CHECK: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
19121 // CHECK: ret i32 [[VQSHRND_N_U64]]
test_vqshrnd_n_u64(uint64_t a)19122 uint32_t test_vqshrnd_n_u64(uint64_t a) {
19123 return (uint32_t)vqshrnd_n_u64(a, 32);
19124 }
19125
19126 // CHECK-LABEL: define i8 @test_vqrshrnh_n_s16(i16 %a) #0 {
19127 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19128 // CHECK: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19129 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
19130 // CHECK: ret i8 [[TMP1]]
test_vqrshrnh_n_s16(int16_t a)19131 int8_t test_vqrshrnh_n_s16(int16_t a) {
19132 return (int8_t)vqrshrnh_n_s16(a, 8);
19133 }
19134
19135 // CHECK-LABEL: define i16 @test_vqrshrns_n_s32(i32 %a) #0 {
19136 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19137 // CHECK: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19138 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
19139 // CHECK: ret i16 [[TMP1]]
test_vqrshrns_n_s32(int32_t a)19140 int16_t test_vqrshrns_n_s32(int32_t a) {
19141 return (int16_t)vqrshrns_n_s32(a, 16);
19142 }
19143
19144 // CHECK-LABEL: define i32 @test_vqrshrnd_n_s64(i64 %a) #0 {
19145 // CHECK: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
19146 // CHECK: ret i32 [[VQRSHRND_N_S64]]
test_vqrshrnd_n_s64(int64_t a)19147 int32_t test_vqrshrnd_n_s64(int64_t a) {
19148 return (int32_t)vqrshrnd_n_s64(a, 32);
19149 }
19150
19151 // CHECK-LABEL: define i8 @test_vqrshrnh_n_u16(i16 %a) #0 {
19152 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19153 // CHECK: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19154 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
19155 // CHECK: ret i8 [[TMP1]]
test_vqrshrnh_n_u16(uint16_t a)19156 uint8_t test_vqrshrnh_n_u16(uint16_t a) {
19157 return (uint8_t)vqrshrnh_n_u16(a, 8);
19158 }
19159
19160 // CHECK-LABEL: define i16 @test_vqrshrns_n_u32(i32 %a) #0 {
19161 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19162 // CHECK: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19163 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
19164 // CHECK: ret i16 [[TMP1]]
test_vqrshrns_n_u32(uint32_t a)19165 uint16_t test_vqrshrns_n_u32(uint32_t a) {
19166 return (uint16_t)vqrshrns_n_u32(a, 16);
19167 }
19168
19169 // CHECK-LABEL: define i32 @test_vqrshrnd_n_u64(i64 %a) #0 {
19170 // CHECK: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
19171 // CHECK: ret i32 [[VQRSHRND_N_U64]]
test_vqrshrnd_n_u64(uint64_t a)19172 uint32_t test_vqrshrnd_n_u64(uint64_t a) {
19173 return (uint32_t)vqrshrnd_n_u64(a, 32);
19174 }
19175
19176 // CHECK-LABEL: define i8 @test_vqshrunh_n_s16(i16 %a) #0 {
19177 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19178 // CHECK: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
19179 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
19180 // CHECK: ret i8 [[TMP1]]
test_vqshrunh_n_s16(int16_t a)19181 int8_t test_vqshrunh_n_s16(int16_t a) {
19182 return (int8_t)vqshrunh_n_s16(a, 8);
19183 }
19184
19185 // CHECK-LABEL: define i16 @test_vqshruns_n_s32(i32 %a) #0 {
19186 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19187 // CHECK: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
19188 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
19189 // CHECK: ret i16 [[TMP1]]
test_vqshruns_n_s32(int32_t a)19190 int16_t test_vqshruns_n_s32(int32_t a) {
19191 return (int16_t)vqshruns_n_s32(a, 16);
19192 }
19193
19194 // CHECK-LABEL: define i32 @test_vqshrund_n_s64(i64 %a) #0 {
19195 // CHECK: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
19196 // CHECK: ret i32 [[VQSHRUND_N_S64]]
test_vqshrund_n_s64(int64_t a)19197 int32_t test_vqshrund_n_s64(int64_t a) {
19198 return (int32_t)vqshrund_n_s64(a, 32);
19199 }
19200
19201 // CHECK-LABEL: define i8 @test_vqrshrunh_n_s16(i16 %a) #0 {
19202 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19203 // CHECK: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
19204 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
19205 // CHECK: ret i8 [[TMP1]]
test_vqrshrunh_n_s16(int16_t a)19206 int8_t test_vqrshrunh_n_s16(int16_t a) {
19207 return (int8_t)vqrshrunh_n_s16(a, 8);
19208 }
19209
19210 // CHECK-LABEL: define i16 @test_vqrshruns_n_s32(i32 %a) #0 {
19211 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19212 // CHECK: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
19213 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
19214 // CHECK: ret i16 [[TMP1]]
test_vqrshruns_n_s32(int32_t a)19215 int16_t test_vqrshruns_n_s32(int32_t a) {
19216 return (int16_t)vqrshruns_n_s32(a, 16);
19217 }
19218
19219 // CHECK-LABEL: define i32 @test_vqrshrund_n_s64(i64 %a) #0 {
19220 // CHECK: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
19221 // CHECK: ret i32 [[VQRSHRUND_N_S64]]
test_vqrshrund_n_s64(int64_t a)19222 int32_t test_vqrshrund_n_s64(int64_t a) {
19223 return (int32_t)vqrshrund_n_s64(a, 32);
19224 }
19225
19226 // CHECK-LABEL: define float @test_vcvts_n_f32_s32(i32 %a) #0 {
19227 // CHECK: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
19228 // CHECK: ret float [[VCVTS_N_F32_S32]]
test_vcvts_n_f32_s32(int32_t a)19229 float32_t test_vcvts_n_f32_s32(int32_t a) {
19230 return vcvts_n_f32_s32(a, 1);
19231 }
19232
19233 // CHECK-LABEL: define double @test_vcvtd_n_f64_s64(i64 %a) #0 {
19234 // CHECK: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
19235 // CHECK: ret double [[VCVTD_N_F64_S64]]
test_vcvtd_n_f64_s64(int64_t a)19236 float64_t test_vcvtd_n_f64_s64(int64_t a) {
19237 return vcvtd_n_f64_s64(a, 1);
19238 }
19239
19240 // CHECK-LABEL: define float @test_vcvts_n_f32_u32(i32 %a) #0 {
19241 // CHECK: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
19242 // CHECK: ret float [[VCVTS_N_F32_U32]]
test_vcvts_n_f32_u32(uint32_t a)19243 float32_t test_vcvts_n_f32_u32(uint32_t a) {
19244 return vcvts_n_f32_u32(a, 32);
19245 }
19246
19247 // CHECK-LABEL: define double @test_vcvtd_n_f64_u64(i64 %a) #0 {
19248 // CHECK: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
19249 // CHECK: ret double [[VCVTD_N_F64_U64]]
test_vcvtd_n_f64_u64(uint64_t a)19250 float64_t test_vcvtd_n_f64_u64(uint64_t a) {
19251 return vcvtd_n_f64_u64(a, 64);
19252 }
19253
19254 // CHECK-LABEL: define i32 @test_vcvts_n_s32_f32(float %a) #0 {
19255 // CHECK: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
19256 // CHECK: ret i32 [[VCVTS_N_S32_F32]]
test_vcvts_n_s32_f32(float32_t a)19257 int32_t test_vcvts_n_s32_f32(float32_t a) {
19258 return (int32_t)vcvts_n_s32_f32(a, 1);
19259 }
19260
19261 // CHECK-LABEL: define i64 @test_vcvtd_n_s64_f64(double %a) #0 {
19262 // CHECK: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
19263 // CHECK: ret i64 [[VCVTD_N_S64_F64]]
test_vcvtd_n_s64_f64(float64_t a)19264 int64_t test_vcvtd_n_s64_f64(float64_t a) {
19265 return (int64_t)vcvtd_n_s64_f64(a, 1);
19266 }
19267
19268 // CHECK-LABEL: define i32 @test_vcvts_n_u32_f32(float %a) #0 {
19269 // CHECK: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
19270 // CHECK: ret i32 [[VCVTS_N_U32_F32]]
test_vcvts_n_u32_f32(float32_t a)19271 uint32_t test_vcvts_n_u32_f32(float32_t a) {
19272 return (uint32_t)vcvts_n_u32_f32(a, 32);
19273 }
19274
19275 // CHECK-LABEL: define i64 @test_vcvtd_n_u64_f64(double %a) #0 {
19276 // CHECK: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
19277 // CHECK: ret i64 [[VCVTD_N_U64_F64]]
test_vcvtd_n_u64_f64(float64_t a)19278 uint64_t test_vcvtd_n_u64_f64(float64_t a) {
19279 return (uint64_t)vcvtd_n_u64_f64(a, 64);
19280 }
19281
19282 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 {
19283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19284 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)19285 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
19286 return vreinterpret_s8_s16(a);
19287 }
19288
19289 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 {
19290 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19291 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)19292 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
19293 return vreinterpret_s8_s32(a);
19294 }
19295
19296 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 {
19297 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19298 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)19299 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
19300 return vreinterpret_s8_s64(a);
19301 }
19302
19303 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 {
19304 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)19305 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
19306 return vreinterpret_s8_u8(a);
19307 }
19308
19309 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 {
19310 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19311 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)19312 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
19313 return vreinterpret_s8_u16(a);
19314 }
19315
19316 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 {
19317 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19318 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)19319 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
19320 return vreinterpret_s8_u32(a);
19321 }
19322
19323 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 {
19324 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19325 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)19326 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
19327 return vreinterpret_s8_u64(a);
19328 }
19329
19330 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 {
19331 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
19332 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)19333 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
19334 return vreinterpret_s8_f16(a);
19335 }
19336
19337 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 {
19338 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
19339 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)19340 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
19341 return vreinterpret_s8_f32(a);
19342 }
19343
19344 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f64(<1 x double> %a) #0 {
19345 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
19346 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f64(float64x1_t a)19347 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
19348 return vreinterpret_s8_f64(a);
19349 }
19350
19351 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 {
19352 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)19353 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
19354 return vreinterpret_s8_p8(a);
19355 }
19356
19357 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 {
19358 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19359 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)19360 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
19361 return vreinterpret_s8_p16(a);
19362 }
19363
19364 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p64(<1 x i64> %a) #0 {
19365 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19366 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p64(poly64x1_t a)19367 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
19368 return vreinterpret_s8_p64(a);
19369 }
19370
19371 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 {
19372 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19373 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)19374 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
19375 return vreinterpret_s16_s8(a);
19376 }
19377
19378 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 {
19379 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19380 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)19381 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
19382 return vreinterpret_s16_s32(a);
19383 }
19384
19385 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 {
19386 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19387 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)19388 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
19389 return vreinterpret_s16_s64(a);
19390 }
19391
19392 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 {
19393 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19394 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)19395 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
19396 return vreinterpret_s16_u8(a);
19397 }
19398
19399 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 {
19400 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)19401 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
19402 return vreinterpret_s16_u16(a);
19403 }
19404
19405 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 {
19406 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19407 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)19408 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
19409 return vreinterpret_s16_u32(a);
19410 }
19411
19412 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 {
19413 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19414 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)19415 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
19416 return vreinterpret_s16_u64(a);
19417 }
19418
19419 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 {
19420 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
19421 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)19422 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
19423 return vreinterpret_s16_f16(a);
19424 }
19425
19426 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 {
19427 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
19428 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)19429 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
19430 return vreinterpret_s16_f32(a);
19431 }
19432
19433 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f64(<1 x double> %a) #0 {
19434 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
19435 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f64(float64x1_t a)19436 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
19437 return vreinterpret_s16_f64(a);
19438 }
19439
19440 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 {
19441 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19442 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)19443 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
19444 return vreinterpret_s16_p8(a);
19445 }
19446
19447 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 {
19448 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)19449 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
19450 return vreinterpret_s16_p16(a);
19451 }
19452
19453 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p64(<1 x i64> %a) #0 {
19454 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19455 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p64(poly64x1_t a)19456 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
19457 return vreinterpret_s16_p64(a);
19458 }
19459
19460 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 {
19461 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19462 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)19463 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
19464 return vreinterpret_s32_s8(a);
19465 }
19466
19467 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 {
19468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19469 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)19470 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
19471 return vreinterpret_s32_s16(a);
19472 }
19473
19474 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 {
19475 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19476 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)19477 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
19478 return vreinterpret_s32_s64(a);
19479 }
19480
19481 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 {
19482 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19483 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)19484 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
19485 return vreinterpret_s32_u8(a);
19486 }
19487
19488 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 {
19489 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19490 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)19491 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
19492 return vreinterpret_s32_u16(a);
19493 }
19494
19495 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 {
19496 // CHECK: ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)19497 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
19498 return vreinterpret_s32_u32(a);
19499 }
19500
19501 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 {
19502 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19503 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)19504 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
19505 return vreinterpret_s32_u64(a);
19506 }
19507
19508 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 {
19509 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
19510 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)19511 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
19512 return vreinterpret_s32_f16(a);
19513 }
19514
19515 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 {
19516 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
19517 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)19518 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
19519 return vreinterpret_s32_f32(a);
19520 }
19521
19522 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f64(<1 x double> %a) #0 {
19523 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
19524 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f64(float64x1_t a)19525 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
19526 return vreinterpret_s32_f64(a);
19527 }
19528
19529 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 {
19530 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19531 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)19532 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
19533 return vreinterpret_s32_p8(a);
19534 }
19535
19536 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 {
19537 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19538 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)19539 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
19540 return vreinterpret_s32_p16(a);
19541 }
19542
19543 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p64(<1 x i64> %a) #0 {
19544 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19545 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p64(poly64x1_t a)19546 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
19547 return vreinterpret_s32_p64(a);
19548 }
19549
19550 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 {
19551 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19552 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)19553 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
19554 return vreinterpret_s64_s8(a);
19555 }
19556
19557 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 {
19558 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19559 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)19560 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
19561 return vreinterpret_s64_s16(a);
19562 }
19563
19564 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 {
19565 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19566 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)19567 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
19568 return vreinterpret_s64_s32(a);
19569 }
19570
19571 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 {
19572 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19573 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)19574 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
19575 return vreinterpret_s64_u8(a);
19576 }
19577
19578 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 {
19579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19580 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)19581 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
19582 return vreinterpret_s64_u16(a);
19583 }
19584
19585 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 {
19586 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19587 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)19588 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
19589 return vreinterpret_s64_u32(a);
19590 }
19591
19592 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 {
19593 // CHECK: ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)19594 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
19595 return vreinterpret_s64_u64(a);
19596 }
19597
19598 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 {
19599 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
19600 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)19601 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
19602 return vreinterpret_s64_f16(a);
19603 }
19604
19605 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 {
19606 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
19607 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)19608 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
19609 return vreinterpret_s64_f32(a);
19610 }
19611
19612 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f64(<1 x double> %a) #0 {
19613 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
19614 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f64(float64x1_t a)19615 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
19616 return vreinterpret_s64_f64(a);
19617 }
19618
19619 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 {
19620 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19621 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)19622 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
19623 return vreinterpret_s64_p8(a);
19624 }
19625
19626 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 {
19627 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19628 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)19629 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
19630 return vreinterpret_s64_p16(a);
19631 }
19632
19633 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p64(<1 x i64> %a) #0 {
19634 // CHECK: ret <1 x i64> %a
test_vreinterpret_s64_p64(poly64x1_t a)19635 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
19636 return vreinterpret_s64_p64(a);
19637 }
19638
19639 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 {
19640 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)19641 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
19642 return vreinterpret_u8_s8(a);
19643 }
19644
19645 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 {
19646 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19647 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)19648 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
19649 return vreinterpret_u8_s16(a);
19650 }
19651
19652 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 {
19653 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19654 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)19655 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
19656 return vreinterpret_u8_s32(a);
19657 }
19658
19659 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 {
19660 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19661 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)19662 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
19663 return vreinterpret_u8_s64(a);
19664 }
19665
19666 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 {
19667 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19668 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)19669 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
19670 return vreinterpret_u8_u16(a);
19671 }
19672
19673 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 {
19674 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19675 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)19676 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
19677 return vreinterpret_u8_u32(a);
19678 }
19679
19680 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 {
19681 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19682 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)19683 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
19684 return vreinterpret_u8_u64(a);
19685 }
19686
19687 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 {
19688 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
19689 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)19690 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
19691 return vreinterpret_u8_f16(a);
19692 }
19693
19694 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 {
19695 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
19696 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)19697 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
19698 return vreinterpret_u8_f32(a);
19699 }
19700
19701 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f64(<1 x double> %a) #0 {
19702 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
19703 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f64(float64x1_t a)19704 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
19705 return vreinterpret_u8_f64(a);
19706 }
19707
19708 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 {
19709 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)19710 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
19711 return vreinterpret_u8_p8(a);
19712 }
19713
19714 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 {
19715 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19716 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)19717 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
19718 return vreinterpret_u8_p16(a);
19719 }
19720
19721 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p64(<1 x i64> %a) #0 {
19722 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19723 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p64(poly64x1_t a)19724 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
19725 return vreinterpret_u8_p64(a);
19726 }
19727
19728 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 {
19729 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19730 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)19731 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
19732 return vreinterpret_u16_s8(a);
19733 }
19734
19735 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 {
19736 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)19737 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
19738 return vreinterpret_u16_s16(a);
19739 }
19740
19741 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 {
19742 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19743 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)19744 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
19745 return vreinterpret_u16_s32(a);
19746 }
19747
19748 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 {
19749 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19750 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)19751 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
19752 return vreinterpret_u16_s64(a);
19753 }
19754
19755 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 {
19756 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19757 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)19758 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
19759 return vreinterpret_u16_u8(a);
19760 }
19761
19762 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 {
19763 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19764 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)19765 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
19766 return vreinterpret_u16_u32(a);
19767 }
19768
19769 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 {
19770 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19771 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)19772 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
19773 return vreinterpret_u16_u64(a);
19774 }
19775
19776 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 {
19777 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
19778 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)19779 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
19780 return vreinterpret_u16_f16(a);
19781 }
19782
19783 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 {
19784 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
19785 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)19786 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
19787 return vreinterpret_u16_f32(a);
19788 }
19789
19790 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f64(<1 x double> %a) #0 {
19791 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
19792 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f64(float64x1_t a)19793 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
19794 return vreinterpret_u16_f64(a);
19795 }
19796
19797 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 {
19798 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19799 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)19800 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
19801 return vreinterpret_u16_p8(a);
19802 }
19803
19804 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 {
19805 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)19806 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
19807 return vreinterpret_u16_p16(a);
19808 }
19809
19810 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p64(<1 x i64> %a) #0 {
19811 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19812 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p64(poly64x1_t a)19813 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
19814 return vreinterpret_u16_p64(a);
19815 }
19816
19817 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 {
19818 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19819 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)19820 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
19821 return vreinterpret_u32_s8(a);
19822 }
19823
19824 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 {
19825 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19826 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)19827 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
19828 return vreinterpret_u32_s16(a);
19829 }
19830
19831 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 {
19832 // CHECK: ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)19833 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
19834 return vreinterpret_u32_s32(a);
19835 }
19836
19837 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 {
19838 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19839 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)19840 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
19841 return vreinterpret_u32_s64(a);
19842 }
19843
19844 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 {
19845 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19846 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)19847 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
19848 return vreinterpret_u32_u8(a);
19849 }
19850
19851 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 {
19852 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19853 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)19854 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
19855 return vreinterpret_u32_u16(a);
19856 }
19857
19858 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 {
19859 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19860 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)19861 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
19862 return vreinterpret_u32_u64(a);
19863 }
19864
19865 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 {
19866 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
19867 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)19868 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
19869 return vreinterpret_u32_f16(a);
19870 }
19871
19872 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 {
19873 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
19874 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)19875 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
19876 return vreinterpret_u32_f32(a);
19877 }
19878
19879 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f64(<1 x double> %a) #0 {
19880 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
19881 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f64(float64x1_t a)19882 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
19883 return vreinterpret_u32_f64(a);
19884 }
19885
19886 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 {
19887 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19888 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)19889 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
19890 return vreinterpret_u32_p8(a);
19891 }
19892
19893 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 {
19894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19895 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)19896 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
19897 return vreinterpret_u32_p16(a);
19898 }
19899
19900 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p64(<1 x i64> %a) #0 {
19901 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19902 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p64(poly64x1_t a)19903 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
19904 return vreinterpret_u32_p64(a);
19905 }
19906
19907 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 {
19908 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19909 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)19910 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
19911 return vreinterpret_u64_s8(a);
19912 }
19913
19914 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 {
19915 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19916 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)19917 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
19918 return vreinterpret_u64_s16(a);
19919 }
19920
19921 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 {
19922 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19923 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)19924 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
19925 return vreinterpret_u64_s32(a);
19926 }
19927
19928 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 {
19929 // CHECK: ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)19930 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
19931 return vreinterpret_u64_s64(a);
19932 }
19933
19934 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 {
19935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19936 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)19937 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
19938 return vreinterpret_u64_u8(a);
19939 }
19940
19941 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 {
19942 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19943 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)19944 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
19945 return vreinterpret_u64_u16(a);
19946 }
19947
19948 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 {
19949 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19950 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)19951 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
19952 return vreinterpret_u64_u32(a);
19953 }
19954
19955 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 {
19956 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
19957 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)19958 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
19959 return vreinterpret_u64_f16(a);
19960 }
19961
19962 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 {
19963 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
19964 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)19965 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
19966 return vreinterpret_u64_f32(a);
19967 }
19968
19969 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f64(<1 x double> %a) #0 {
19970 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
19971 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f64(float64x1_t a)19972 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
19973 return vreinterpret_u64_f64(a);
19974 }
19975
19976 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 {
19977 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19978 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)19979 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
19980 return vreinterpret_u64_p8(a);
19981 }
19982
19983 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 {
19984 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19985 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)19986 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
19987 return vreinterpret_u64_p16(a);
19988 }
19989
19990 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p64(<1 x i64> %a) #0 {
19991 // CHECK: ret <1 x i64> %a
test_vreinterpret_u64_p64(poly64x1_t a)19992 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
19993 return vreinterpret_u64_p64(a);
19994 }
19995
19996 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 {
19997 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
19998 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)19999 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
20000 return vreinterpret_f16_s8(a);
20001 }
20002
20003 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 {
20004 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
20005 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)20006 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
20007 return vreinterpret_f16_s16(a);
20008 }
20009
20010 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 {
20011 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
20012 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)20013 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
20014 return vreinterpret_f16_s32(a);
20015 }
20016
20017 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 {
20018 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
20019 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)20020 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
20021 return vreinterpret_f16_s64(a);
20022 }
20023
20024 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 {
20025 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
20026 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)20027 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
20028 return vreinterpret_f16_u8(a);
20029 }
20030
20031 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 {
20032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
20033 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)20034 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
20035 return vreinterpret_f16_u16(a);
20036 }
20037
20038 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 {
20039 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
20040 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)20041 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
20042 return vreinterpret_f16_u32(a);
20043 }
20044
20045 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 {
20046 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
20047 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)20048 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
20049 return vreinterpret_f16_u64(a);
20050 }
20051
20052 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 {
20053 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
20054 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)20055 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
20056 return vreinterpret_f16_f32(a);
20057 }
20058
20059 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f64(<1 x double> %a) #0 {
20060 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
20061 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f64(float64x1_t a)20062 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
20063 return vreinterpret_f16_f64(a);
20064 }
20065
20066 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 {
20067 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
20068 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)20069 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
20070 return vreinterpret_f16_p8(a);
20071 }
20072
20073 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 {
20074 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
20075 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)20076 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
20077 return vreinterpret_f16_p16(a);
20078 }
20079
20080 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p64(<1 x i64> %a) #0 {
20081 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
20082 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p64(poly64x1_t a)20083 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
20084 return vreinterpret_f16_p64(a);
20085 }
20086
20087 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 {
20088 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
20089 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)20090 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
20091 return vreinterpret_f32_s8(a);
20092 }
20093
20094 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 {
20095 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
20096 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)20097 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
20098 return vreinterpret_f32_s16(a);
20099 }
20100
20101 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 {
20102 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
20103 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)20104 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
20105 return vreinterpret_f32_s32(a);
20106 }
20107
20108 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 {
20109 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
20110 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)20111 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
20112 return vreinterpret_f32_s64(a);
20113 }
20114
20115 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 {
20116 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
20117 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)20118 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
20119 return vreinterpret_f32_u8(a);
20120 }
20121
20122 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 {
20123 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
20124 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)20125 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
20126 return vreinterpret_f32_u16(a);
20127 }
20128
20129 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 {
20130 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
20131 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)20132 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
20133 return vreinterpret_f32_u32(a);
20134 }
20135
20136 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 {
20137 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
20138 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)20139 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
20140 return vreinterpret_f32_u64(a);
20141 }
20142
20143 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 {
20144 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
20145 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)20146 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
20147 return vreinterpret_f32_f16(a);
20148 }
20149
20150 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f64(<1 x double> %a) #0 {
20151 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
20152 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f64(float64x1_t a)20153 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
20154 return vreinterpret_f32_f64(a);
20155 }
20156
20157 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 {
20158 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
20159 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)20160 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
20161 return vreinterpret_f32_p8(a);
20162 }
20163
20164 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 {
20165 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
20166 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)20167 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
20168 return vreinterpret_f32_p16(a);
20169 }
20170
20171 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p64(<1 x i64> %a) #0 {
20172 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
20173 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p64(poly64x1_t a)20174 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
20175 return vreinterpret_f32_p64(a);
20176 }
20177
20178 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s8(<8 x i8> %a) #0 {
20179 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
20180 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s8(int8x8_t a)20181 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
20182 return vreinterpret_f64_s8(a);
20183 }
20184
20185 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s16(<4 x i16> %a) #0 {
20186 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
20187 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s16(int16x4_t a)20188 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
20189 return vreinterpret_f64_s16(a);
20190 }
20191
20192 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s32(<2 x i32> %a) #0 {
20193 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
20194 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s32(int32x2_t a)20195 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
20196 return vreinterpret_f64_s32(a);
20197 }
20198
20199 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s64(<1 x i64> %a) #0 {
20200 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
20201 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s64(int64x1_t a)20202 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
20203 return vreinterpret_f64_s64(a);
20204 }
20205
20206 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u8(<8 x i8> %a) #0 {
20207 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
20208 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u8(uint8x8_t a)20209 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
20210 return vreinterpret_f64_u8(a);
20211 }
20212
20213 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u16(<4 x i16> %a) #0 {
20214 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
20215 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u16(uint16x4_t a)20216 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
20217 return vreinterpret_f64_u16(a);
20218 }
20219
20220 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u32(<2 x i32> %a) #0 {
20221 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
20222 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u32(uint32x2_t a)20223 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
20224 return vreinterpret_f64_u32(a);
20225 }
20226
20227 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u64(<1 x i64> %a) #0 {
20228 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
20229 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u64(uint64x1_t a)20230 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
20231 return vreinterpret_f64_u64(a);
20232 }
20233
20234 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f16(<4 x half> %a) #0 {
20235 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
20236 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f16(float16x4_t a)20237 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
20238 return vreinterpret_f64_f16(a);
20239 }
20240
20241 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f32(<2 x float> %a) #0 {
20242 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
20243 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f32(float32x2_t a)20244 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
20245 return vreinterpret_f64_f32(a);
20246 }
20247
20248 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p8(<8 x i8> %a) #0 {
20249 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
20250 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p8(poly8x8_t a)20251 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
20252 return vreinterpret_f64_p8(a);
20253 }
20254
20255 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p16(<4 x i16> %a) #0 {
20256 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
20257 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p16(poly16x4_t a)20258 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
20259 return vreinterpret_f64_p16(a);
20260 }
20261
20262 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p64(<1 x i64> %a) #0 {
20263 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
20264 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p64(poly64x1_t a)20265 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
20266 return vreinterpret_f64_p64(a);
20267 }
20268
20269 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 {
20270 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)20271 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
20272 return vreinterpret_p8_s8(a);
20273 }
20274
20275 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 {
20276 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20277 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)20278 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
20279 return vreinterpret_p8_s16(a);
20280 }
20281
20282 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 {
20283 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20284 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)20285 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
20286 return vreinterpret_p8_s32(a);
20287 }
20288
20289 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 {
20290 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
20291 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)20292 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
20293 return vreinterpret_p8_s64(a);
20294 }
20295
20296 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 {
20297 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)20298 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
20299 return vreinterpret_p8_u8(a);
20300 }
20301
20302 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 {
20303 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20304 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)20305 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
20306 return vreinterpret_p8_u16(a);
20307 }
20308
20309 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 {
20310 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20311 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)20312 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
20313 return vreinterpret_p8_u32(a);
20314 }
20315
20316 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 {
20317 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
20318 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)20319 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
20320 return vreinterpret_p8_u64(a);
20321 }
20322
20323 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 {
20324 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
20325 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)20326 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
20327 return vreinterpret_p8_f16(a);
20328 }
20329
20330 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 {
20331 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
20332 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)20333 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
20334 return vreinterpret_p8_f32(a);
20335 }
20336
20337 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f64(<1 x double> %a) #0 {
20338 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
20339 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f64(float64x1_t a)20340 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
20341 return vreinterpret_p8_f64(a);
20342 }
20343
20344 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 {
20345 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20346 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)20347 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
20348 return vreinterpret_p8_p16(a);
20349 }
20350
20351 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p64(<1 x i64> %a) #0 {
20352 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
20353 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p64(poly64x1_t a)20354 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
20355 return vreinterpret_p8_p64(a);
20356 }
20357
20358 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 {
20359 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
20360 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)20361 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
20362 return vreinterpret_p16_s8(a);
20363 }
20364
20365 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 {
20366 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)20367 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
20368 return vreinterpret_p16_s16(a);
20369 }
20370
20371 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 {
20372 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
20373 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)20374 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
20375 return vreinterpret_p16_s32(a);
20376 }
20377
20378 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 {
20379 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
20380 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)20381 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
20382 return vreinterpret_p16_s64(a);
20383 }
20384
20385 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 {
20386 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
20387 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)20388 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
20389 return vreinterpret_p16_u8(a);
20390 }
20391
20392 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 {
20393 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)20394 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
20395 return vreinterpret_p16_u16(a);
20396 }
20397
20398 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 {
20399 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
20400 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)20401 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
20402 return vreinterpret_p16_u32(a);
20403 }
20404
20405 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 {
20406 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
20407 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)20408 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
20409 return vreinterpret_p16_u64(a);
20410 }
20411
20412 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 {
20413 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
20414 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)20415 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
20416 return vreinterpret_p16_f16(a);
20417 }
20418
20419 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 {
20420 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
20421 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)20422 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
20423 return vreinterpret_p16_f32(a);
20424 }
20425
20426 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f64(<1 x double> %a) #0 {
20427 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
20428 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f64(float64x1_t a)20429 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
20430 return vreinterpret_p16_f64(a);
20431 }
20432
20433 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 {
20434 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
20435 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)20436 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
20437 return vreinterpret_p16_p8(a);
20438 }
20439
20440 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p64(<1 x i64> %a) #0 {
20441 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
20442 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p64(poly64x1_t a)20443 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
20444 return vreinterpret_p16_p64(a);
20445 }
20446
20447 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s8(<8 x i8> %a) #0 {
20448 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
20449 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s8(int8x8_t a)20450 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
20451 return vreinterpret_p64_s8(a);
20452 }
20453
20454 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s16(<4 x i16> %a) #0 {
20455 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
20456 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s16(int16x4_t a)20457 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
20458 return vreinterpret_p64_s16(a);
20459 }
20460
20461 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s32(<2 x i32> %a) #0 {
20462 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
20463 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s32(int32x2_t a)20464 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
20465 return vreinterpret_p64_s32(a);
20466 }
20467
20468 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s64(<1 x i64> %a) #0 {
20469 // CHECK: ret <1 x i64> %a
test_vreinterpret_p64_s64(int64x1_t a)20470 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
20471 return vreinterpret_p64_s64(a);
20472 }
20473
20474 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u8(<8 x i8> %a) #0 {
20475 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
20476 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u8(uint8x8_t a)20477 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
20478 return vreinterpret_p64_u8(a);
20479 }
20480
20481 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u16(<4 x i16> %a) #0 {
20482 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
20483 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u16(uint16x4_t a)20484 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
20485 return vreinterpret_p64_u16(a);
20486 }
20487
20488 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u32(<2 x i32> %a) #0 {
20489 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
20490 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u32(uint32x2_t a)20491 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
20492 return vreinterpret_p64_u32(a);
20493 }
20494
20495 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u64(<1 x i64> %a) #0 {
20496 // CHECK: ret <1 x i64> %a
test_vreinterpret_p64_u64(uint64x1_t a)20497 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
20498 return vreinterpret_p64_u64(a);
20499 }
20500
20501 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f16(<4 x half> %a) #0 {
20502 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
20503 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f16(float16x4_t a)20504 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
20505 return vreinterpret_p64_f16(a);
20506 }
20507
20508 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f32(<2 x float> %a) #0 {
20509 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
20510 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f32(float32x2_t a)20511 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
20512 return vreinterpret_p64_f32(a);
20513 }
20514
20515 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f64(<1 x double> %a) #0 {
20516 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
20517 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f64(float64x1_t a)20518 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
20519 return vreinterpret_p64_f64(a);
20520 }
20521
20522 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p8(<8 x i8> %a) #0 {
20523 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
20524 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p8(poly8x8_t a)20525 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
20526 return vreinterpret_p64_p8(a);
20527 }
20528
20529 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p16(<4 x i16> %a) #0 {
20530 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
20531 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p16(poly16x4_t a)20532 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
20533 return vreinterpret_p64_p16(a);
20534 }
20535
20536 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 {
20537 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20538 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)20539 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
20540 return vreinterpretq_s8_s16(a);
20541 }
20542
20543 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 {
20544 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20545 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)20546 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
20547 return vreinterpretq_s8_s32(a);
20548 }
20549
20550 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 {
20551 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20552 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)20553 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
20554 return vreinterpretq_s8_s64(a);
20555 }
20556
20557 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 {
20558 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)20559 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
20560 return vreinterpretq_s8_u8(a);
20561 }
20562
20563 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 {
20564 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20565 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)20566 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
20567 return vreinterpretq_s8_u16(a);
20568 }
20569
20570 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 {
20571 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20572 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)20573 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
20574 return vreinterpretq_s8_u32(a);
20575 }
20576
20577 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 {
20578 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20579 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)20580 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
20581 return vreinterpretq_s8_u64(a);
20582 }
20583
20584 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 {
20585 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
20586 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)20587 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
20588 return vreinterpretq_s8_f16(a);
20589 }
20590
20591 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 {
20592 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
20593 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)20594 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
20595 return vreinterpretq_s8_f32(a);
20596 }
20597
20598 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f64(<2 x double> %a) #0 {
20599 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
20600 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f64(float64x2_t a)20601 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
20602 return vreinterpretq_s8_f64(a);
20603 }
20604
20605 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 {
20606 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)20607 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
20608 return vreinterpretq_s8_p8(a);
20609 }
20610
20611 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 {
20612 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20613 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)20614 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
20615 return vreinterpretq_s8_p16(a);
20616 }
20617
20618 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p64(<2 x i64> %a) #0 {
20619 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20620 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p64(poly64x2_t a)20621 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
20622 return vreinterpretq_s8_p64(a);
20623 }
20624
20625 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 {
20626 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20627 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)20628 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
20629 return vreinterpretq_s16_s8(a);
20630 }
20631
20632 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 {
20633 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
20634 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)20635 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
20636 return vreinterpretq_s16_s32(a);
20637 }
20638
20639 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 {
20640 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
20641 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)20642 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
20643 return vreinterpretq_s16_s64(a);
20644 }
20645
20646 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 {
20647 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20648 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)20649 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
20650 return vreinterpretq_s16_u8(a);
20651 }
20652
20653 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 {
20654 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)20655 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
20656 return vreinterpretq_s16_u16(a);
20657 }
20658
20659 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 {
20660 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
20661 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)20662 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
20663 return vreinterpretq_s16_u32(a);
20664 }
20665
20666 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 {
20667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
20668 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)20669 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
20670 return vreinterpretq_s16_u64(a);
20671 }
20672
20673 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 {
20674 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
20675 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)20676 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
20677 return vreinterpretq_s16_f16(a);
20678 }
20679
20680 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 {
20681 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
20682 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)20683 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
20684 return vreinterpretq_s16_f32(a);
20685 }
20686
20687 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f64(<2 x double> %a) #0 {
20688 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
20689 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f64(float64x2_t a)20690 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
20691 return vreinterpretq_s16_f64(a);
20692 }
20693
20694 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 {
20695 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20696 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)20697 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
20698 return vreinterpretq_s16_p8(a);
20699 }
20700
20701 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 {
20702 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)20703 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
20704 return vreinterpretq_s16_p16(a);
20705 }
20706
20707 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p64(<2 x i64> %a) #0 {
20708 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
20709 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p64(poly64x2_t a)20710 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
20711 return vreinterpretq_s16_p64(a);
20712 }
20713
20714 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 {
20715 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
20716 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)20717 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
20718 return vreinterpretq_s32_s8(a);
20719 }
20720
20721 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 {
20722 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
20723 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)20724 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
20725 return vreinterpretq_s32_s16(a);
20726 }
20727
20728 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 {
20729 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
20730 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)20731 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
20732 return vreinterpretq_s32_s64(a);
20733 }
20734
20735 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 {
20736 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
20737 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)20738 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
20739 return vreinterpretq_s32_u8(a);
20740 }
20741
20742 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 {
20743 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
20744 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)20745 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
20746 return vreinterpretq_s32_u16(a);
20747 }
20748
20749 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 {
20750 // CHECK: ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)20751 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
20752 return vreinterpretq_s32_u32(a);
20753 }
20754
20755 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 {
20756 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
20757 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)20758 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
20759 return vreinterpretq_s32_u64(a);
20760 }
20761
20762 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 {
20763 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
20764 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)20765 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
20766 return vreinterpretq_s32_f16(a);
20767 }
20768
20769 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 {
20770 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
20771 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)20772 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
20773 return vreinterpretq_s32_f32(a);
20774 }
20775
20776 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f64(<2 x double> %a) #0 {
20777 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
20778 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f64(float64x2_t a)20779 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
20780 return vreinterpretq_s32_f64(a);
20781 }
20782
20783 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 {
20784 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
20785 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)20786 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
20787 return vreinterpretq_s32_p8(a);
20788 }
20789
20790 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 {
20791 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
20792 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)20793 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
20794 return vreinterpretq_s32_p16(a);
20795 }
20796
20797 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p64(<2 x i64> %a) #0 {
20798 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
20799 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p64(poly64x2_t a)20800 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
20801 return vreinterpretq_s32_p64(a);
20802 }
20803
20804 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 {
20805 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
20806 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)20807 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
20808 return vreinterpretq_s64_s8(a);
20809 }
20810
20811 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 {
20812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
20813 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)20814 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
20815 return vreinterpretq_s64_s16(a);
20816 }
20817
20818 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 {
20819 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
20820 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)20821 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
20822 return vreinterpretq_s64_s32(a);
20823 }
20824
20825 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 {
20826 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
20827 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)20828 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
20829 return vreinterpretq_s64_u8(a);
20830 }
20831
20832 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 {
20833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
20834 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)20835 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
20836 return vreinterpretq_s64_u16(a);
20837 }
20838
20839 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 {
20840 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
20841 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)20842 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
20843 return vreinterpretq_s64_u32(a);
20844 }
20845
20846 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 {
20847 // CHECK: ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)20848 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
20849 return vreinterpretq_s64_u64(a);
20850 }
20851
20852 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 {
20853 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
20854 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)20855 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
20856 return vreinterpretq_s64_f16(a);
20857 }
20858
20859 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 {
20860 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
20861 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)20862 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
20863 return vreinterpretq_s64_f32(a);
20864 }
20865
20866 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f64(<2 x double> %a) #0 {
20867 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
20868 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f64(float64x2_t a)20869 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
20870 return vreinterpretq_s64_f64(a);
20871 }
20872
20873 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 {
20874 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
20875 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)20876 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
20877 return vreinterpretq_s64_p8(a);
20878 }
20879
20880 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 {
20881 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
20882 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)20883 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
20884 return vreinterpretq_s64_p16(a);
20885 }
20886
20887 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p64(<2 x i64> %a) #0 {
20888 // CHECK: ret <2 x i64> %a
test_vreinterpretq_s64_p64(poly64x2_t a)20889 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
20890 return vreinterpretq_s64_p64(a);
20891 }
20892
20893 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 {
20894 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)20895 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
20896 return vreinterpretq_u8_s8(a);
20897 }
20898
20899 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 {
20900 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20901 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)20902 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
20903 return vreinterpretq_u8_s16(a);
20904 }
20905
20906 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 {
20907 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20908 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)20909 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
20910 return vreinterpretq_u8_s32(a);
20911 }
20912
20913 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 {
20914 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20915 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)20916 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
20917 return vreinterpretq_u8_s64(a);
20918 }
20919
20920 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 {
20921 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20922 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)20923 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
20924 return vreinterpretq_u8_u16(a);
20925 }
20926
20927 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 {
20928 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20929 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)20930 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
20931 return vreinterpretq_u8_u32(a);
20932 }
20933
20934 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 {
20935 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20936 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)20937 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
20938 return vreinterpretq_u8_u64(a);
20939 }
20940
20941 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 {
20942 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
20943 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)20944 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
20945 return vreinterpretq_u8_f16(a);
20946 }
20947
20948 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 {
20949 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
20950 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)20951 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
20952 return vreinterpretq_u8_f32(a);
20953 }
20954
20955 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f64(<2 x double> %a) #0 {
20956 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
20957 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f64(float64x2_t a)20958 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
20959 return vreinterpretq_u8_f64(a);
20960 }
20961
20962 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 {
20963 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)20964 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
20965 return vreinterpretq_u8_p8(a);
20966 }
20967
20968 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 {
20969 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20970 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)20971 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
20972 return vreinterpretq_u8_p16(a);
20973 }
20974
20975 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p64(<2 x i64> %a) #0 {
20976 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20977 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p64(poly64x2_t a)20978 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
20979 return vreinterpretq_u8_p64(a);
20980 }
20981
20982 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 {
20983 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20984 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)20985 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
20986 return vreinterpretq_u16_s8(a);
20987 }
20988
20989 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 {
20990 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)20991 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
20992 return vreinterpretq_u16_s16(a);
20993 }
20994
20995 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 {
20996 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
20997 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)20998 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
20999 return vreinterpretq_u16_s32(a);
21000 }
21001
21002 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 {
21003 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21004 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)21005 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
21006 return vreinterpretq_u16_s64(a);
21007 }
21008
21009 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 {
21010 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21011 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)21012 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
21013 return vreinterpretq_u16_u8(a);
21014 }
21015
21016 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 {
21017 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
21018 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)21019 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
21020 return vreinterpretq_u16_u32(a);
21021 }
21022
21023 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 {
21024 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21025 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)21026 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
21027 return vreinterpretq_u16_u64(a);
21028 }
21029
21030 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 {
21031 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
21032 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)21033 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
21034 return vreinterpretq_u16_f16(a);
21035 }
21036
21037 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 {
21038 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
21039 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)21040 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
21041 return vreinterpretq_u16_f32(a);
21042 }
21043
21044 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f64(<2 x double> %a) #0 {
21045 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
21046 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f64(float64x2_t a)21047 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
21048 return vreinterpretq_u16_f64(a);
21049 }
21050
21051 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 {
21052 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21053 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)21054 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
21055 return vreinterpretq_u16_p8(a);
21056 }
21057
21058 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 {
21059 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)21060 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
21061 return vreinterpretq_u16_p16(a);
21062 }
21063
21064 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p64(<2 x i64> %a) #0 {
21065 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21066 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p64(poly64x2_t a)21067 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
21068 return vreinterpretq_u16_p64(a);
21069 }
21070
21071 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 {
21072 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
21073 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)21074 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
21075 return vreinterpretq_u32_s8(a);
21076 }
21077
21078 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 {
21079 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
21080 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)21081 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
21082 return vreinterpretq_u32_s16(a);
21083 }
21084
21085 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 {
21086 // CHECK: ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)21087 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
21088 return vreinterpretq_u32_s32(a);
21089 }
21090
21091 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 {
21092 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
21093 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)21094 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
21095 return vreinterpretq_u32_s64(a);
21096 }
21097
21098 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 {
21099 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
21100 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)21101 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
21102 return vreinterpretq_u32_u8(a);
21103 }
21104
21105 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 {
21106 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
21107 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)21108 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
21109 return vreinterpretq_u32_u16(a);
21110 }
21111
21112 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 {
21113 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
21114 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)21115 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
21116 return vreinterpretq_u32_u64(a);
21117 }
21118
21119 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 {
21120 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
21121 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)21122 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
21123 return vreinterpretq_u32_f16(a);
21124 }
21125
21126 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 {
21127 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
21128 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)21129 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
21130 return vreinterpretq_u32_f32(a);
21131 }
21132
21133 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f64(<2 x double> %a) #0 {
21134 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
21135 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f64(float64x2_t a)21136 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
21137 return vreinterpretq_u32_f64(a);
21138 }
21139
21140 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 {
21141 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
21142 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)21143 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
21144 return vreinterpretq_u32_p8(a);
21145 }
21146
21147 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 {
21148 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
21149 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)21150 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
21151 return vreinterpretq_u32_p16(a);
21152 }
21153
21154 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p64(<2 x i64> %a) #0 {
21155 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
21156 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p64(poly64x2_t a)21157 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
21158 return vreinterpretq_u32_p64(a);
21159 }
21160
21161 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 {
21162 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21163 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)21164 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
21165 return vreinterpretq_u64_s8(a);
21166 }
21167
21168 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 {
21169 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21170 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)21171 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
21172 return vreinterpretq_u64_s16(a);
21173 }
21174
21175 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 {
21176 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21177 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)21178 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
21179 return vreinterpretq_u64_s32(a);
21180 }
21181
21182 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 {
21183 // CHECK: ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)21184 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
21185 return vreinterpretq_u64_s64(a);
21186 }
21187
21188 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 {
21189 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21190 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)21191 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
21192 return vreinterpretq_u64_u8(a);
21193 }
21194
21195 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 {
21196 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21197 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)21198 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
21199 return vreinterpretq_u64_u16(a);
21200 }
21201
21202 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 {
21203 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21204 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)21205 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
21206 return vreinterpretq_u64_u32(a);
21207 }
21208
21209 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 {
21210 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
21211 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)21212 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
21213 return vreinterpretq_u64_f16(a);
21214 }
21215
21216 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 {
21217 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
21218 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)21219 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
21220 return vreinterpretq_u64_f32(a);
21221 }
21222
21223 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f64(<2 x double> %a) #0 {
21224 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
21225 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f64(float64x2_t a)21226 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
21227 return vreinterpretq_u64_f64(a);
21228 }
21229
21230 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 {
21231 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21232 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)21233 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
21234 return vreinterpretq_u64_p8(a);
21235 }
21236
21237 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 {
21238 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21239 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)21240 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
21241 return vreinterpretq_u64_p16(a);
21242 }
21243
21244 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p64(<2 x i64> %a) #0 {
21245 // CHECK: ret <2 x i64> %a
test_vreinterpretq_u64_p64(poly64x2_t a)21246 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
21247 return vreinterpretq_u64_p64(a);
21248 }
21249
21250 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 {
21251 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
21252 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)21253 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
21254 return vreinterpretq_f16_s8(a);
21255 }
21256
21257 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 {
21258 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
21259 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)21260 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
21261 return vreinterpretq_f16_s16(a);
21262 }
21263
21264 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 {
21265 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
21266 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)21267 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
21268 return vreinterpretq_f16_s32(a);
21269 }
21270
21271 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 {
21272 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
21273 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)21274 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
21275 return vreinterpretq_f16_s64(a);
21276 }
21277
21278 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 {
21279 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
21280 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)21281 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
21282 return vreinterpretq_f16_u8(a);
21283 }
21284
21285 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 {
21286 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
21287 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)21288 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
21289 return vreinterpretq_f16_u16(a);
21290 }
21291
21292 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 {
21293 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
21294 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)21295 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
21296 return vreinterpretq_f16_u32(a);
21297 }
21298
21299 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 {
21300 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
21301 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)21302 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
21303 return vreinterpretq_f16_u64(a);
21304 }
21305
21306 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 {
21307 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
21308 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)21309 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
21310 return vreinterpretq_f16_f32(a);
21311 }
21312
21313 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f64(<2 x double> %a) #0 {
21314 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
21315 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f64(float64x2_t a)21316 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
21317 return vreinterpretq_f16_f64(a);
21318 }
21319
21320 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 {
21321 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
21322 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)21323 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
21324 return vreinterpretq_f16_p8(a);
21325 }
21326
21327 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 {
21328 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
21329 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)21330 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
21331 return vreinterpretq_f16_p16(a);
21332 }
21333
21334 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p64(<2 x i64> %a) #0 {
21335 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
21336 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p64(poly64x2_t a)21337 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
21338 return vreinterpretq_f16_p64(a);
21339 }
21340
21341 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 {
21342 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
21343 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)21344 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
21345 return vreinterpretq_f32_s8(a);
21346 }
21347
21348 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 {
21349 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
21350 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)21351 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
21352 return vreinterpretq_f32_s16(a);
21353 }
21354
21355 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 {
21356 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
21357 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)21358 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
21359 return vreinterpretq_f32_s32(a);
21360 }
21361
21362 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 {
21363 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
21364 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)21365 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
21366 return vreinterpretq_f32_s64(a);
21367 }
21368
21369 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 {
21370 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
21371 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)21372 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
21373 return vreinterpretq_f32_u8(a);
21374 }
21375
21376 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 {
21377 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
21378 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)21379 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
21380 return vreinterpretq_f32_u16(a);
21381 }
21382
21383 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 {
21384 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
21385 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)21386 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
21387 return vreinterpretq_f32_u32(a);
21388 }
21389
21390 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 {
21391 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
21392 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)21393 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
21394 return vreinterpretq_f32_u64(a);
21395 }
21396
21397 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 {
21398 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
21399 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)21400 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
21401 return vreinterpretq_f32_f16(a);
21402 }
21403
21404 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f64(<2 x double> %a) #0 {
21405 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
21406 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f64(float64x2_t a)21407 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
21408 return vreinterpretq_f32_f64(a);
21409 }
21410
21411 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 {
21412 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
21413 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)21414 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
21415 return vreinterpretq_f32_p8(a);
21416 }
21417
21418 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 {
21419 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
21420 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)21421 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
21422 return vreinterpretq_f32_p16(a);
21423 }
21424
21425 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p64(<2 x i64> %a) #0 {
21426 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
21427 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p64(poly64x2_t a)21428 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
21429 return vreinterpretq_f32_p64(a);
21430 }
21431
21432 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s8(<16 x i8> %a) #0 {
21433 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
21434 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s8(int8x16_t a)21435 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
21436 return vreinterpretq_f64_s8(a);
21437 }
21438
21439 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s16(<8 x i16> %a) #0 {
21440 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
21441 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s16(int16x8_t a)21442 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
21443 return vreinterpretq_f64_s16(a);
21444 }
21445
21446 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s32(<4 x i32> %a) #0 {
21447 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
21448 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s32(int32x4_t a)21449 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
21450 return vreinterpretq_f64_s32(a);
21451 }
21452
21453 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s64(<2 x i64> %a) #0 {
21454 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
21455 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s64(int64x2_t a)21456 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
21457 return vreinterpretq_f64_s64(a);
21458 }
21459
21460 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u8(<16 x i8> %a) #0 {
21461 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
21462 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u8(uint8x16_t a)21463 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
21464 return vreinterpretq_f64_u8(a);
21465 }
21466
21467 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u16(<8 x i16> %a) #0 {
21468 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
21469 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u16(uint16x8_t a)21470 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
21471 return vreinterpretq_f64_u16(a);
21472 }
21473
21474 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u32(<4 x i32> %a) #0 {
21475 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
21476 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u32(uint32x4_t a)21477 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
21478 return vreinterpretq_f64_u32(a);
21479 }
21480
21481 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u64(<2 x i64> %a) #0 {
21482 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
21483 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u64(uint64x2_t a)21484 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
21485 return vreinterpretq_f64_u64(a);
21486 }
21487
21488 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f16(<8 x half> %a) #0 {
21489 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
21490 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f16(float16x8_t a)21491 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
21492 return vreinterpretq_f64_f16(a);
21493 }
21494
21495 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f32(<4 x float> %a) #0 {
21496 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
21497 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f32(float32x4_t a)21498 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
21499 return vreinterpretq_f64_f32(a);
21500 }
21501
21502 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p8(<16 x i8> %a) #0 {
21503 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
21504 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p8(poly8x16_t a)21505 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
21506 return vreinterpretq_f64_p8(a);
21507 }
21508
21509 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p16(<8 x i16> %a) #0 {
21510 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
21511 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p16(poly16x8_t a)21512 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
21513 return vreinterpretq_f64_p16(a);
21514 }
21515
21516 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p64(<2 x i64> %a) #0 {
21517 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
21518 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p64(poly64x2_t a)21519 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
21520 return vreinterpretq_f64_p64(a);
21521 }
21522
21523 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 {
21524 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)21525 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
21526 return vreinterpretq_p8_s8(a);
21527 }
21528
21529 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 {
21530 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21531 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)21532 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
21533 return vreinterpretq_p8_s16(a);
21534 }
21535
21536 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 {
21537 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21538 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)21539 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
21540 return vreinterpretq_p8_s32(a);
21541 }
21542
21543 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 {
21544 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21545 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)21546 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
21547 return vreinterpretq_p8_s64(a);
21548 }
21549
21550 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 {
21551 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)21552 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
21553 return vreinterpretq_p8_u8(a);
21554 }
21555
21556 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 {
21557 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21558 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)21559 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
21560 return vreinterpretq_p8_u16(a);
21561 }
21562
21563 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 {
21564 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21565 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)21566 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
21567 return vreinterpretq_p8_u32(a);
21568 }
21569
21570 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 {
21571 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21572 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)21573 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
21574 return vreinterpretq_p8_u64(a);
21575 }
21576
21577 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 {
21578 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
21579 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)21580 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
21581 return vreinterpretq_p8_f16(a);
21582 }
21583
21584 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 {
21585 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
21586 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)21587 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
21588 return vreinterpretq_p8_f32(a);
21589 }
21590
21591 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f64(<2 x double> %a) #0 {
21592 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21593 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f64(float64x2_t a)21594 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
21595 return vreinterpretq_p8_f64(a);
21596 }
21597
21598 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 {
21599 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21600 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)21601 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
21602 return vreinterpretq_p8_p16(a);
21603 }
21604
21605 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p64(<2 x i64> %a) #0 {
21606 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21607 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p64(poly64x2_t a)21608 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
21609 return vreinterpretq_p8_p64(a);
21610 }
21611
21612 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 {
21613 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21614 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)21615 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
21616 return vreinterpretq_p16_s8(a);
21617 }
21618
21619 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 {
21620 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)21621 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
21622 return vreinterpretq_p16_s16(a);
21623 }
21624
21625 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 {
21626 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
21627 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)21628 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
21629 return vreinterpretq_p16_s32(a);
21630 }
21631
21632 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 {
21633 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21634 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)21635 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
21636 return vreinterpretq_p16_s64(a);
21637 }
21638
21639 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 {
21640 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21641 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)21642 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
21643 return vreinterpretq_p16_u8(a);
21644 }
21645
21646 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 {
21647 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)21648 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
21649 return vreinterpretq_p16_u16(a);
21650 }
21651
21652 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 {
21653 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
21654 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)21655 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
21656 return vreinterpretq_p16_u32(a);
21657 }
21658
21659 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 {
21660 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21661 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)21662 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
21663 return vreinterpretq_p16_u64(a);
21664 }
21665
21666 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 {
21667 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
21668 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)21669 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
21670 return vreinterpretq_p16_f16(a);
21671 }
21672
21673 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 {
21674 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
21675 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)21676 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
21677 return vreinterpretq_p16_f32(a);
21678 }
21679
21680 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f64(<2 x double> %a) #0 {
21681 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
21682 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f64(float64x2_t a)21683 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
21684 return vreinterpretq_p16_f64(a);
21685 }
21686
21687 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 {
21688 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21689 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)21690 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
21691 return vreinterpretq_p16_p8(a);
21692 }
21693
21694 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p64(<2 x i64> %a) #0 {
21695 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21696 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p64(poly64x2_t a)21697 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
21698 return vreinterpretq_p16_p64(a);
21699 }
21700
21701 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s8(<16 x i8> %a) #0 {
21702 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21703 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s8(int8x16_t a)21704 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
21705 return vreinterpretq_p64_s8(a);
21706 }
21707
21708 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s16(<8 x i16> %a) #0 {
21709 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21710 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s16(int16x8_t a)21711 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
21712 return vreinterpretq_p64_s16(a);
21713 }
21714
21715 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s32(<4 x i32> %a) #0 {
21716 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21717 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s32(int32x4_t a)21718 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
21719 return vreinterpretq_p64_s32(a);
21720 }
21721
21722 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s64(<2 x i64> %a) #0 {
21723 // CHECK: ret <2 x i64> %a
test_vreinterpretq_p64_s64(int64x2_t a)21724 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
21725 return vreinterpretq_p64_s64(a);
21726 }
21727
21728 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u8(<16 x i8> %a) #0 {
21729 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21730 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u8(uint8x16_t a)21731 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
21732 return vreinterpretq_p64_u8(a);
21733 }
21734
21735 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u16(<8 x i16> %a) #0 {
21736 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21737 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u16(uint16x8_t a)21738 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
21739 return vreinterpretq_p64_u16(a);
21740 }
21741
21742 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u32(<4 x i32> %a) #0 {
21743 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21744 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u32(uint32x4_t a)21745 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
21746 return vreinterpretq_p64_u32(a);
21747 }
21748
21749 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u64(<2 x i64> %a) #0 {
21750 // CHECK: ret <2 x i64> %a
test_vreinterpretq_p64_u64(uint64x2_t a)21751 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
21752 return vreinterpretq_p64_u64(a);
21753 }
21754
21755 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f16(<8 x half> %a) #0 {
21756 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
21757 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f16(float16x8_t a)21758 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
21759 return vreinterpretq_p64_f16(a);
21760 }
21761
21762 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f32(<4 x float> %a) #0 {
21763 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
21764 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f32(float32x4_t a)21765 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
21766 return vreinterpretq_p64_f32(a);
21767 }
21768
21769 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f64(<2 x double> %a) #0 {
21770 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
21771 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f64(float64x2_t a)21772 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
21773 return vreinterpretq_p64_f64(a);
21774 }
21775
21776 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p8(<16 x i8> %a) #0 {
21777 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21778 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p8(poly8x16_t a)21779 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
21780 return vreinterpretq_p64_p8(a);
21781 }
21782
21783 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p16(<8 x i16> %a) #0 {
21784 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21785 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p16(poly16x8_t a)21786 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
21787 return vreinterpretq_p64_p16(a);
21788 }
21789
21790 // CHECK-LABEL: define float @test_vabds_f32(float %a, float %b) #0 {
21791 // CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) #4
21792 // CHECK: ret float [[VABDS_F32_I]]
test_vabds_f32(float32_t a,float32_t b)21793 float32_t test_vabds_f32(float32_t a, float32_t b) {
21794 return vabds_f32(a, b);
21795 }
21796
21797 // CHECK-LABEL: define double @test_vabdd_f64(double %a, double %b) #0 {
21798 // CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) #4
21799 // CHECK: ret double [[VABDD_F64_I]]
test_vabdd_f64(float64_t a,float64_t b)21800 float64_t test_vabdd_f64(float64_t a, float64_t b) {
21801 return vabdd_f64(a, b);
21802 }
21803
21804 // CHECK-LABEL: define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
21805 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21806 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
21807 // CHECK: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21808 // CHECK: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
21809 // CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> [[VUQADD_I]], <1 x i64> [[VUQADD1_I]]) #4
21810 // CHECK: ret <1 x i64> [[VUQADD2_I]]
test_vuqadd_s64(int64x1_t a,uint64x1_t b)21811 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
21812 return vuqadd_s64(a, b);
21813 }
21814
21815 // CHECK-LABEL: define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
21816 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21817 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
21818 // CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21819 // CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
21820 // CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> [[VSQADD_I]], <1 x i64> [[VSQADD1_I]]) #4
21821 // CHECK: ret <1 x i64> [[VSQADD2_I]]
test_vsqadd_u64(uint64x1_t a,int64x1_t b)21822 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
21823 return vsqadd_u64(a, b);
21824 }
21825
21826 // CHECK-LABEL: define <8 x i8> @test_vsqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
21827 // CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
21828 // CHECK: ret <8 x i8> [[VSQADD_I]]
test_vsqadd_u8(uint8x8_t a,int8x8_t b)21829 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
21830 return vsqadd_u8(a, b);
21831 }
21832
21833 // CHECK-LABEL: define <16 x i8> @test_vsqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
21834 // CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
21835 // CHECK: ret <16 x i8> [[VSQADD_I]]
test_vsqaddq_u8(uint8x16_t a,int8x16_t b)21836 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
21837 return vsqaddq_u8(a, b);
21838 }
21839
21840 // CHECK-LABEL: define <4 x i16> @test_vsqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
21841 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
21842 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
21843 // CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
21844 // CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
21845 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[VSQADD_I]], <4 x i16> [[VSQADD1_I]]) #4
21846 // CHECK: ret <4 x i16> [[VSQADD2_I]]
test_vsqadd_u16(uint16x4_t a,int16x4_t b)21847 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
21848 return vsqadd_u16(a, b);
21849 }
21850
21851 // CHECK-LABEL: define <8 x i16> @test_vsqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
21852 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21853 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21854 // CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
21855 // CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
21856 // CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> [[VSQADD_I]], <8 x i16> [[VSQADD1_I]]) #4
21857 // CHECK: ret <8 x i16> [[VSQADD2_I]]
test_vsqaddq_u16(uint16x8_t a,int16x8_t b)21858 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
21859 return vsqaddq_u16(a, b);
21860 }
21861
21862 // CHECK-LABEL: define <2 x i32> @test_vsqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
21863 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
21864 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
21865 // CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
21866 // CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
21867 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> [[VSQADD_I]], <2 x i32> [[VSQADD1_I]]) #4
21868 // CHECK: ret <2 x i32> [[VSQADD2_I]]
test_vsqadd_u32(uint32x2_t a,int32x2_t b)21869 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
21870 return vsqadd_u32(a, b);
21871 }
21872
21873 // CHECK-LABEL: define <4 x i32> @test_vsqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
21874 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21875 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21876 // CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
21877 // CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
21878 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> [[VSQADD_I]], <4 x i32> [[VSQADD1_I]]) #4
21879 // CHECK: ret <4 x i32> [[VSQADD2_I]]
test_vsqaddq_u32(uint32x4_t a,int32x4_t b)21880 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
21881 return vsqaddq_u32(a, b);
21882 }
21883
21884 // CHECK-LABEL: define <2 x i64> @test_vsqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
21885 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21886 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
21887 // CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
21888 // CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
21889 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> [[VSQADD_I]], <2 x i64> [[VSQADD1_I]]) #4
21890 // CHECK: ret <2 x i64> [[VSQADD2_I]]
test_vsqaddq_u64(uint64x2_t a,int64x2_t b)21891 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
21892 return vsqaddq_u64(a, b);
21893 }
21894
21895 // CHECK-LABEL: define <1 x i64> @test_vabs_s64(<1 x i64> %a) #0 {
21896 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21897 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21898 // CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> [[VABS_I]]) #4
21899 // CHECK: ret <1 x i64> [[VABS1_I]]
test_vabs_s64(int64x1_t a)21900 int64x1_t test_vabs_s64(int64x1_t a) {
21901 return vabs_s64(a);
21902 }
21903
21904 // CHECK-LABEL: define <1 x i64> @test_vqabs_s64(<1 x i64> %a) #0 {
21905 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21906 // CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21907 // CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> [[VQABS_V_I]]) #4
21908 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
21909 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <1 x i64>
21910 // CHECK: ret <1 x i64> [[TMP1]]
test_vqabs_s64(int64x1_t a)21911 int64x1_t test_vqabs_s64(int64x1_t a) {
21912 return vqabs_s64(a);
21913 }
21914
21915 // CHECK-LABEL: define <1 x i64> @test_vqneg_s64(<1 x i64> %a) #0 {
21916 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21917 // CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21918 // CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> [[VQNEG_V_I]]) #4
21919 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
21920 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <1 x i64>
21921 // CHECK: ret <1 x i64> [[TMP1]]
test_vqneg_s64(int64x1_t a)21922 int64x1_t test_vqneg_s64(int64x1_t a) {
21923 return vqneg_s64(a);
21924 }
21925
21926 // CHECK-LABEL: define <1 x i64> @test_vneg_s64(<1 x i64> %a) #0 {
21927 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
21928 // CHECK: ret <1 x i64> [[SUB_I]]
test_vneg_s64(int64x1_t a)21929 int64x1_t test_vneg_s64(int64x1_t a) {
21930 return vneg_s64(a);
21931 }
21932
21933 // CHECK-LABEL: define float @test_vaddv_f32(<2 x float> %a) #0 {
21934 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
21935 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
21936 // CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> [[TMP1]]) #4
21937 // CHECK: ret float [[VADDV_F32_I]]
test_vaddv_f32(float32x2_t a)21938 float32_t test_vaddv_f32(float32x2_t a) {
21939 return vaddv_f32(a);
21940 }
21941
21942 // CHECK-LABEL: define float @test_vaddvq_f32(<4 x float> %a) #0 {
21943 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
21944 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
21945 // CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> [[TMP1]]) #4
21946 // CHECK: ret float [[VADDVQ_F32_I]]
test_vaddvq_f32(float32x4_t a)21947 float32_t test_vaddvq_f32(float32x4_t a) {
21948 return vaddvq_f32(a);
21949 }
21950
21951 // CHECK-LABEL: define double @test_vaddvq_f64(<2 x double> %a) #0 {
21952 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21953 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21954 // CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> [[TMP1]]) #4
21955 // CHECK: ret double [[VADDVQ_F64_I]]
test_vaddvq_f64(float64x2_t a)21956 float64_t test_vaddvq_f64(float64x2_t a) {
21957 return vaddvq_f64(a);
21958 }
21959
21960 // CHECK-LABEL: define float @test_vmaxv_f32(<2 x float> %a) #0 {
21961 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
21962 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
21963 // CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4
21964 // CHECK: ret float [[VMAXV_F32_I]]
test_vmaxv_f32(float32x2_t a)21965 float32_t test_vmaxv_f32(float32x2_t a) {
21966 return vmaxv_f32(a);
21967 }
21968
21969 // CHECK-LABEL: define double @test_vmaxvq_f64(<2 x double> %a) #0 {
21970 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21971 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21972 // CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4
21973 // CHECK: ret double [[VMAXVQ_F64_I]]
test_vmaxvq_f64(float64x2_t a)21974 float64_t test_vmaxvq_f64(float64x2_t a) {
21975 return vmaxvq_f64(a);
21976 }
21977
21978 // CHECK-LABEL: define float @test_vminv_f32(<2 x float> %a) #0 {
21979 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
21980 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
21981 // CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4
21982 // CHECK: ret float [[VMINV_F32_I]]
test_vminv_f32(float32x2_t a)21983 float32_t test_vminv_f32(float32x2_t a) {
21984 return vminv_f32(a);
21985 }
21986
21987 // CHECK-LABEL: define double @test_vminvq_f64(<2 x double> %a) #0 {
21988 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21989 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21990 // CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4
21991 // CHECK: ret double [[VMINVQ_F64_I]]
test_vminvq_f64(float64x2_t a)21992 float64_t test_vminvq_f64(float64x2_t a) {
21993 return vminvq_f64(a);
21994 }
21995
21996 // CHECK-LABEL: define double @test_vmaxnmvq_f64(<2 x double> %a) #0 {
21997 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21998 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21999 // CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
22000 // CHECK: ret double [[VMAXNMVQ_F64_I]]
test_vmaxnmvq_f64(float64x2_t a)22001 float64_t test_vmaxnmvq_f64(float64x2_t a) {
22002 return vmaxnmvq_f64(a);
22003 }
22004
22005 // CHECK-LABEL: define float @test_vmaxnmv_f32(<2 x float> %a) #0 {
22006 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
22007 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
22008 // CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
22009 // CHECK: ret float [[VMAXNMV_F32_I]]
test_vmaxnmv_f32(float32x2_t a)22010 float32_t test_vmaxnmv_f32(float32x2_t a) {
22011 return vmaxnmv_f32(a);
22012 }
22013
22014 // CHECK-LABEL: define double @test_vminnmvq_f64(<2 x double> %a) #0 {
22015 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
22016 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
22017 // CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
22018 // CHECK: ret double [[VMINNMVQ_F64_I]]
test_vminnmvq_f64(float64x2_t a)22019 float64_t test_vminnmvq_f64(float64x2_t a) {
22020 return vminnmvq_f64(a);
22021 }
22022
22023 // CHECK-LABEL: define float @test_vminnmv_f32(<2 x float> %a) #0 {
22024 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
22025 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
22026 // CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
22027 // CHECK: ret float [[VMINNMV_F32_I]]
test_vminnmv_f32(float32x2_t a)22028 float32_t test_vminnmv_f32(float32x2_t a) {
22029 return vminnmv_f32(a);
22030 }
22031
22032 // CHECK-LABEL: define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
22033 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22034 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
22035 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22036 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
22037 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4
22038 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
22039 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64>
22040 // CHECK: ret <2 x i64> [[TMP2]]
test_vpaddq_s64(int64x2_t a,int64x2_t b)22041 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
22042 return vpaddq_s64(a, b);
22043 }
22044
22045 // CHECK-LABEL: define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
22046 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22047 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
22048 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22049 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
22050 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4
22051 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
22052 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64>
22053 // CHECK: ret <2 x i64> [[TMP2]]
test_vpaddq_u64(uint64x2_t a,uint64x2_t b)22054 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
22055 return vpaddq_u64(a, b);
22056 }
22057
22058 // CHECK-LABEL: define i64 @test_vpaddd_u64(<2 x i64> %a) #0 {
22059 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22060 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22061 // CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
22062 // CHECK: ret i64 [[VPADDD_U64_I]]
test_vpaddd_u64(uint64x2_t a)22063 uint64_t test_vpaddd_u64(uint64x2_t a) {
22064 return vpaddd_u64(a);
22065 }
22066
22067 // CHECK-LABEL: define i64 @test_vaddvq_s64(<2 x i64> %a) #0 {
22068 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22069 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22070 // CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
22071 // CHECK: ret i64 [[VADDVQ_S64_I]]
test_vaddvq_s64(int64x2_t a)22072 int64_t test_vaddvq_s64(int64x2_t a) {
22073 return vaddvq_s64(a);
22074 }
22075
22076 // CHECK-LABEL: define i64 @test_vaddvq_u64(<2 x i64> %a) #0 {
22077 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22078 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22079 // CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
22080 // CHECK: ret i64 [[VADDVQ_U64_I]]
test_vaddvq_u64(uint64x2_t a)22081 uint64_t test_vaddvq_u64(uint64x2_t a) {
22082 return vaddvq_u64(a);
22083 }
22084
22085 // CHECK-LABEL: define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) #0 {
22086 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, %b
22087 // CHECK: ret <1 x double> [[ADD_I]]
test_vadd_f64(float64x1_t a,float64x1_t b)22088 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
22089 return vadd_f64(a, b);
22090 }
22091
22092 // CHECK-LABEL: define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) #0 {
22093 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %a, %b
22094 // CHECK: ret <1 x double> [[MUL_I]]
test_vmul_f64(float64x1_t a,float64x1_t b)22095 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
22096 return vmul_f64(a, b);
22097 }
22098
22099 // CHECK-LABEL: define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) #0 {
22100 // CHECK: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
22101 // CHECK: ret <1 x double> [[DIV_I]]
test_vdiv_f64(float64x1_t a,float64x1_t b)22102 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
22103 return vdiv_f64(a, b);
22104 }
22105
22106 // CHECK-LABEL: define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22107 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
22108 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
22109 // CHECK: ret <1 x double> [[ADD_I]]
test_vmla_f64(float64x1_t a,float64x1_t b,float64x1_t c)22110 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22111 return vmla_f64(a, b, c);
22112 }
22113
22114 // CHECK-LABEL: define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22115 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
22116 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
22117 // CHECK: ret <1 x double> [[SUB_I]]
test_vmls_f64(float64x1_t a,float64x1_t b,float64x1_t c)22118 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22119 return vmls_f64(a, b, c);
22120 }
22121
22122 // CHECK-LABEL: define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22123 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22124 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22125 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
22126 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22127 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22128 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
22129 // CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4
22130 // CHECK: ret <1 x double> [[TMP6]]
test_vfma_f64(float64x1_t a,float64x1_t b,float64x1_t c)22131 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22132 return vfma_f64(a, b, c);
22133 }
22134
22135 // CHECK-LABEL: define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22136 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
22137 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22138 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
22139 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
22140 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22141 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22142 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
22143 // CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4
22144 // CHECK: ret <1 x double> [[TMP6]]
test_vfms_f64(float64x1_t a,float64x1_t b,float64x1_t c)22145 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22146 return vfms_f64(a, b, c);
22147 }
22148
22149 // CHECK-LABEL: define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) #0 {
22150 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, %b
22151 // CHECK: ret <1 x double> [[SUB_I]]
test_vsub_f64(float64x1_t a,float64x1_t b)22152 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
22153 return vsub_f64(a, b);
22154 }
22155
22156 // CHECK-LABEL: define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) #0 {
22157 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22158 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22159 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22160 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22161 // CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> [[VABD_I]], <1 x double> [[VABD1_I]]) #4
22162 // CHECK: ret <1 x double> [[VABD2_I]]
test_vabd_f64(float64x1_t a,float64x1_t b)22163 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
22164 return vabd_f64(a, b);
22165 }
22166
22167 // CHECK-LABEL: define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) #0 {
22168 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22169 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22170 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22171 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22172 // CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> [[VMAX_I]], <1 x double> [[VMAX1_I]]) #4
22173 // CHECK: ret <1 x double> [[VMAX2_I]]
test_vmax_f64(float64x1_t a,float64x1_t b)22174 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
22175 return vmax_f64(a, b);
22176 }
22177
22178 // CHECK-LABEL: define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) #0 {
22179 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22180 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22181 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22182 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22183 // CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> [[VMIN_I]], <1 x double> [[VMIN1_I]]) #4
22184 // CHECK: ret <1 x double> [[VMIN2_I]]
test_vmin_f64(float64x1_t a,float64x1_t b)22185 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
22186 return vmin_f64(a, b);
22187 }
22188
22189 // CHECK-LABEL: define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) #0 {
22190 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22191 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22192 // CHECK: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22193 // CHECK: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22194 // CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> [[VMAXNM_I]], <1 x double> [[VMAXNM1_I]]) #4
22195 // CHECK: ret <1 x double> [[VMAXNM2_I]]
test_vmaxnm_f64(float64x1_t a,float64x1_t b)22196 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
22197 return vmaxnm_f64(a, b);
22198 }
22199
22200 // CHECK-LABEL: define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) #0 {
22201 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22202 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22203 // CHECK: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22204 // CHECK: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22205 // CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> [[VMINNM_I]], <1 x double> [[VMINNM1_I]]) #4
22206 // CHECK: ret <1 x double> [[VMINNM2_I]]
test_vminnm_f64(float64x1_t a,float64x1_t b)22207 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
22208 return vminnm_f64(a, b);
22209 }
22210
22211 // CHECK-LABEL: define <1 x double> @test_vabs_f64(<1 x double> %a) #0 {
22212 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22213 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22214 // CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> [[VABS_I]]) #4
22215 // CHECK: ret <1 x double> [[VABS1_I]]
test_vabs_f64(float64x1_t a)22216 float64x1_t test_vabs_f64(float64x1_t a) {
22217 return vabs_f64(a);
22218 }
22219
22220 // CHECK-LABEL: define <1 x double> @test_vneg_f64(<1 x double> %a) #0 {
22221 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %a
22222 // CHECK: ret <1 x double> [[SUB_I]]
test_vneg_f64(float64x1_t a)22223 float64x1_t test_vneg_f64(float64x1_t a) {
22224 return vneg_f64(a);
22225 }
22226
22227 // CHECK-LABEL: define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) #0 {
22228 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22229 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22230 // CHECK: [[TMP2:%.*]] = fptosi <1 x double> [[TMP1]] to <1 x i64>
22231 // CHECK: ret <1 x i64> [[TMP2]]
test_vcvt_s64_f64(float64x1_t a)22232 int64x1_t test_vcvt_s64_f64(float64x1_t a) {
22233 return vcvt_s64_f64(a);
22234 }
22235
22236 // CHECK-LABEL: define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) #0 {
22237 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22238 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22239 // CHECK: [[TMP2:%.*]] = fptoui <1 x double> [[TMP1]] to <1 x i64>
22240 // CHECK: ret <1 x i64> [[TMP2]]
test_vcvt_u64_f64(float64x1_t a)22241 uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
22242 return vcvt_u64_f64(a);
22243 }
22244
22245 // CHECK-LABEL: define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) #0 {
22246 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22247 // CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22248 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4
22249 // CHECK: ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_s64_f64(float64x1_t a)22250 int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
22251 return vcvtn_s64_f64(a);
22252 }
22253
22254 // CHECK-LABEL: define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) #0 {
22255 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22256 // CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22257 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4
22258 // CHECK: ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_u64_f64(float64x1_t a)22259 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
22260 return vcvtn_u64_f64(a);
22261 }
22262
22263 // CHECK-LABEL: define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) #0 {
22264 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22265 // CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22266 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4
22267 // CHECK: ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_s64_f64(float64x1_t a)22268 int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
22269 return vcvtp_s64_f64(a);
22270 }
22271
22272 // CHECK-LABEL: define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) #0 {
22273 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22274 // CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22275 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4
22276 // CHECK: ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_u64_f64(float64x1_t a)22277 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
22278 return vcvtp_u64_f64(a);
22279 }
22280
22281 // CHECK-LABEL: define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) #0 {
22282 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22283 // CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22284 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4
22285 // CHECK: ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_s64_f64(float64x1_t a)22286 int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
22287 return vcvtm_s64_f64(a);
22288 }
22289
22290 // CHECK-LABEL: define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) #0 {
22291 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22292 // CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22293 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4
22294 // CHECK: ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_u64_f64(float64x1_t a)22295 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
22296 return vcvtm_u64_f64(a);
22297 }
22298
22299 // CHECK-LABEL: define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) #0 {
22300 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22301 // CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22302 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4
22303 // CHECK: ret <1 x i64> [[VCVTA1_I]]
test_vcvta_s64_f64(float64x1_t a)22304 int64x1_t test_vcvta_s64_f64(float64x1_t a) {
22305 return vcvta_s64_f64(a);
22306 }
22307
22308 // CHECK-LABEL: define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) #0 {
22309 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22310 // CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22311 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4
22312 // CHECK: ret <1 x i64> [[VCVTA1_I]]
test_vcvta_u64_f64(float64x1_t a)22313 uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
22314 return vcvta_u64_f64(a);
22315 }
22316
22317 // CHECK-LABEL: define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) #0 {
22318 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22319 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22320 // CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> [[TMP1]] to <1 x double>
22321 // CHECK: ret <1 x double> [[VCVT_I]]
test_vcvt_f64_s64(int64x1_t a)22322 float64x1_t test_vcvt_f64_s64(int64x1_t a) {
22323 return vcvt_f64_s64(a);
22324 }
22325
22326 // CHECK-LABEL: define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) #0 {
22327 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22328 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22329 // CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> [[TMP1]] to <1 x double>
22330 // CHECK: ret <1 x double> [[VCVT_I]]
test_vcvt_f64_u64(uint64x1_t a)22331 float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
22332 return vcvt_f64_u64(a);
22333 }
22334
22335 // CHECK-LABEL: define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) #0 {
22336 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22337 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22338 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
22339 // CHECK: ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_s64_f64(float64x1_t a)22340 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
22341 return vcvt_n_s64_f64(a, 64);
22342 }
22343
22344 // CHECK-LABEL: define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) #0 {
22345 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22346 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22347 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
22348 // CHECK: ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_u64_f64(float64x1_t a)22349 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
22350 return vcvt_n_u64_f64(a, 64);
22351 }
22352
22353 // CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) #0 {
22354 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22355 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22356 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
22357 // CHECK: ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_s64(int64x1_t a)22358 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
22359 return vcvt_n_f64_s64(a, 64);
22360 }
22361
22362 // CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) #0 {
22363 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22364 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22365 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
22366 // CHECK: ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_u64(uint64x1_t a)22367 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
22368 return vcvt_n_f64_u64(a, 64);
22369 }
22370
22371 // CHECK-LABEL: define <1 x double> @test_vrndn_f64(<1 x double> %a) #0 {
22372 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22373 // CHECK: [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22374 // CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> [[VRNDN_I]]) #4
22375 // CHECK: ret <1 x double> [[VRNDN1_I]]
test_vrndn_f64(float64x1_t a)22376 float64x1_t test_vrndn_f64(float64x1_t a) {
22377 return vrndn_f64(a);
22378 }
22379
22380 // CHECK-LABEL: define <1 x double> @test_vrnda_f64(<1 x double> %a) #0 {
22381 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22382 // CHECK: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22383 // CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> [[VRNDA_I]]) #4
22384 // CHECK: ret <1 x double> [[VRNDA1_I]]
test_vrnda_f64(float64x1_t a)22385 float64x1_t test_vrnda_f64(float64x1_t a) {
22386 return vrnda_f64(a);
22387 }
22388
22389 // CHECK-LABEL: define <1 x double> @test_vrndp_f64(<1 x double> %a) #0 {
22390 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22391 // CHECK: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22392 // CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> [[VRNDP_I]]) #4
22393 // CHECK: ret <1 x double> [[VRNDP1_I]]
test_vrndp_f64(float64x1_t a)22394 float64x1_t test_vrndp_f64(float64x1_t a) {
22395 return vrndp_f64(a);
22396 }
22397
22398 // CHECK-LABEL: define <1 x double> @test_vrndm_f64(<1 x double> %a) #0 {
22399 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22400 // CHECK: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22401 // CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> [[VRNDM_I]]) #4
22402 // CHECK: ret <1 x double> [[VRNDM1_I]]
test_vrndm_f64(float64x1_t a)22403 float64x1_t test_vrndm_f64(float64x1_t a) {
22404 return vrndm_f64(a);
22405 }
22406
22407 // CHECK-LABEL: define <1 x double> @test_vrndx_f64(<1 x double> %a) #0 {
22408 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22409 // CHECK: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22410 // CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> [[VRNDX_I]]) #4
22411 // CHECK: ret <1 x double> [[VRNDX1_I]]
test_vrndx_f64(float64x1_t a)22412 float64x1_t test_vrndx_f64(float64x1_t a) {
22413 return vrndx_f64(a);
22414 }
22415
22416 // CHECK-LABEL: define <1 x double> @test_vrnd_f64(<1 x double> %a) #0 {
22417 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22418 // CHECK: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22419 // CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> [[VRNDZ_I]]) #4
22420 // CHECK: ret <1 x double> [[VRNDZ1_I]]
test_vrnd_f64(float64x1_t a)22421 float64x1_t test_vrnd_f64(float64x1_t a) {
22422 return vrnd_f64(a);
22423 }
22424
22425 // CHECK-LABEL: define <1 x double> @test_vrndi_f64(<1 x double> %a) #0 {
22426 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22427 // CHECK: [[VRNDI_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22428 // CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> [[VRNDI_I]]) #4
22429 // CHECK: ret <1 x double> [[VRNDI1_I]]
test_vrndi_f64(float64x1_t a)22430 float64x1_t test_vrndi_f64(float64x1_t a) {
22431 return vrndi_f64(a);
22432 }
22433
22434 // CHECK-LABEL: define <1 x double> @test_vrsqrte_f64(<1 x double> %a) #0 {
22435 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22436 // CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22437 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> [[VRSQRTE_V_I]]) #4
22438 // CHECK: ret <1 x double> [[VRSQRTE_V1_I]]
test_vrsqrte_f64(float64x1_t a)22439 float64x1_t test_vrsqrte_f64(float64x1_t a) {
22440 return vrsqrte_f64(a);
22441 }
22442
22443 // CHECK-LABEL: define <1 x double> @test_vrecpe_f64(<1 x double> %a) #0 {
22444 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22445 // CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22446 // CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> [[VRECPE_V_I]]) #4
22447 // CHECK: ret <1 x double> [[VRECPE_V1_I]]
test_vrecpe_f64(float64x1_t a)22448 float64x1_t test_vrecpe_f64(float64x1_t a) {
22449 return vrecpe_f64(a);
22450 }
22451
22452 // CHECK-LABEL: define <1 x double> @test_vsqrt_f64(<1 x double> %a) #0 {
22453 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22454 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22455 // CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP1]]) #4
22456 // CHECK: ret <1 x double> [[VSQRT_I]]
test_vsqrt_f64(float64x1_t a)22457 float64x1_t test_vsqrt_f64(float64x1_t a) {
22458 return vsqrt_f64(a);
22459 }
22460
22461 // CHECK-LABEL: define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) #0 {
22462 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22463 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22464 // CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22465 // CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22466 // CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> [[VRECPS_V_I]], <1 x double> [[VRECPS_V1_I]]) #4
22467 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <1 x double> [[VRECPS_V2_I]] to <8 x i8>
22468 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <1 x double>
22469 // CHECK: ret <1 x double> [[TMP2]]
test_vrecps_f64(float64x1_t a,float64x1_t b)22470 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
22471 return vrecps_f64(a, b);
22472 }
22473
22474 // CHECK-LABEL: define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) #0 {
22475 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22476 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22477 // CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22478 // CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22479 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> [[VRSQRTS_V_I]], <1 x double> [[VRSQRTS_V1_I]]) #4
22480 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
22481 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <1 x double>
22482 // CHECK: ret <1 x double> [[TMP2]]
test_vrsqrts_f64(float64x1_t a,float64x1_t b)22483 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
22484 return vrsqrts_f64(a, b);
22485 }
22486
22487 // CHECK-LABEL: define i32 @test_vminv_s32(<2 x i32> %a) #0 {
22488 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22489 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22490 // CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22491 // CHECK: ret i32 [[VMINV_S32_I]]
test_vminv_s32(int32x2_t a)22492 int32_t test_vminv_s32(int32x2_t a) {
22493 return vminv_s32(a);
22494 }
22495
22496 // CHECK-LABEL: define i32 @test_vminv_u32(<2 x i32> %a) #0 {
22497 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22498 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22499 // CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22500 // CHECK: ret i32 [[VMINV_U32_I]]
test_vminv_u32(uint32x2_t a)22501 uint32_t test_vminv_u32(uint32x2_t a) {
22502 return vminv_u32(a);
22503 }
22504
22505 // CHECK-LABEL: define i32 @test_vmaxv_s32(<2 x i32> %a) #0 {
22506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22507 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22508 // CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22509 // CHECK: ret i32 [[VMAXV_S32_I]]
test_vmaxv_s32(int32x2_t a)22510 int32_t test_vmaxv_s32(int32x2_t a) {
22511 return vmaxv_s32(a);
22512 }
22513
22514 // CHECK-LABEL: define i32 @test_vmaxv_u32(<2 x i32> %a) #0 {
22515 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22516 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22517 // CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22518 // CHECK: ret i32 [[VMAXV_U32_I]]
test_vmaxv_u32(uint32x2_t a)22519 uint32_t test_vmaxv_u32(uint32x2_t a) {
22520 return vmaxv_u32(a);
22521 }
22522
22523 // CHECK-LABEL: define i32 @test_vaddv_s32(<2 x i32> %a) #0 {
22524 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22525 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22526 // CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22527 // CHECK: ret i32 [[VADDV_S32_I]]
test_vaddv_s32(int32x2_t a)22528 int32_t test_vaddv_s32(int32x2_t a) {
22529 return vaddv_s32(a);
22530 }
22531
22532 // CHECK-LABEL: define i32 @test_vaddv_u32(<2 x i32> %a) #0 {
22533 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22534 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22535 // CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22536 // CHECK: ret i32 [[VADDV_U32_I]]
test_vaddv_u32(uint32x2_t a)22537 uint32_t test_vaddv_u32(uint32x2_t a) {
22538 return vaddv_u32(a);
22539 }
22540
22541 // CHECK-LABEL: define i64 @test_vaddlv_s32(<2 x i32> %a) #0 {
22542 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22543 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22544 // CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4
22545 // CHECK: ret i64 [[VADDLV_S32_I]]
test_vaddlv_s32(int32x2_t a)22546 int64_t test_vaddlv_s32(int32x2_t a) {
22547 return vaddlv_s32(a);
22548 }
22549
22550 // CHECK-LABEL: define i64 @test_vaddlv_u32(<2 x i32> %a) #0 {
22551 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22552 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22553 // CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4
22554 // CHECK: ret i64 [[VADDLV_U32_I]]
test_vaddlv_u32(uint32x2_t a)22555 uint64_t test_vaddlv_u32(uint32x2_t a) {
22556 return vaddlv_u32(a);
22557 }
22558