xref: /aosp_15_r20/external/clang/test/CodeGen/aarch64-neon-perm.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2*67e74705SXin Li // RUN:  -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3*67e74705SXin Li 
4*67e74705SXin Li // Test new aarch64 intrinsics and types
5*67e74705SXin Li #include <arm_neon.h>
6*67e74705SXin Li 
7*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
9*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp1_s8(int8x8_t a,int8x8_t b)10*67e74705SXin Li int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
11*67e74705SXin Li   return vuzp1_s8(a, b);
12*67e74705SXin Li }
13*67e74705SXin Li 
14*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
15*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
16*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp1q_s8(int8x16_t a,int8x16_t b)17*67e74705SXin Li int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
18*67e74705SXin Li   return vuzp1q_s8(a, b);
19*67e74705SXin Li }
20*67e74705SXin Li 
21*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
22*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
23*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp1_s16(int16x4_t a,int16x4_t b)24*67e74705SXin Li int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
25*67e74705SXin Li   return vuzp1_s16(a, b);
26*67e74705SXin Li }
27*67e74705SXin Li 
28*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
29*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
30*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp1q_s16(int16x8_t a,int16x8_t b)31*67e74705SXin Li int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
32*67e74705SXin Li   return vuzp1q_s16(a, b);
33*67e74705SXin Li }
34*67e74705SXin Li 
35*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
36*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
37*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vuzp1_s32(int32x2_t a,int32x2_t b)38*67e74705SXin Li int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
39*67e74705SXin Li   return vuzp1_s32(a, b);
40*67e74705SXin Li }
41*67e74705SXin Li 
42*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
43*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
44*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vuzp1q_s32(int32x4_t a,int32x4_t b)45*67e74705SXin Li int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
46*67e74705SXin Li   return vuzp1q_s32(a, b);
47*67e74705SXin Li }
48*67e74705SXin Li 
49*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
50*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
51*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp1q_s64(int64x2_t a,int64x2_t b)52*67e74705SXin Li int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
53*67e74705SXin Li   return vuzp1q_s64(a, b);
54*67e74705SXin Li }
55*67e74705SXin Li 
56*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
57*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
58*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp1_u8(uint8x8_t a,uint8x8_t b)59*67e74705SXin Li uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
60*67e74705SXin Li   return vuzp1_u8(a, b);
61*67e74705SXin Li }
62*67e74705SXin Li 
63*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
64*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
65*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp1q_u8(uint8x16_t a,uint8x16_t b)66*67e74705SXin Li uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
67*67e74705SXin Li   return vuzp1q_u8(a, b);
68*67e74705SXin Li }
69*67e74705SXin Li 
70*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
71*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
72*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp1_u16(uint16x4_t a,uint16x4_t b)73*67e74705SXin Li uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
74*67e74705SXin Li   return vuzp1_u16(a, b);
75*67e74705SXin Li }
76*67e74705SXin Li 
77*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
78*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
79*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp1q_u16(uint16x8_t a,uint16x8_t b)80*67e74705SXin Li uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
81*67e74705SXin Li   return vuzp1q_u16(a, b);
82*67e74705SXin Li }
83*67e74705SXin Li 
84*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
85*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
86*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vuzp1_u32(uint32x2_t a,uint32x2_t b)87*67e74705SXin Li uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
88*67e74705SXin Li   return vuzp1_u32(a, b);
89*67e74705SXin Li }
90*67e74705SXin Li 
91*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
92*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
93*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vuzp1q_u32(uint32x4_t a,uint32x4_t b)94*67e74705SXin Li uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
95*67e74705SXin Li   return vuzp1q_u32(a, b);
96*67e74705SXin Li }
97*67e74705SXin Li 
98*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
99*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
100*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp1q_u64(uint64x2_t a,uint64x2_t b)101*67e74705SXin Li uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
102*67e74705SXin Li   return vuzp1q_u64(a, b);
103*67e74705SXin Li }
104*67e74705SXin Li 
105*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) #0 {
106*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
107*67e74705SXin Li // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vuzp1_f32(float32x2_t a,float32x2_t b)108*67e74705SXin Li float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
109*67e74705SXin Li   return vuzp1_f32(a, b);
110*67e74705SXin Li }
111*67e74705SXin Li 
112*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) #0 {
113*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
114*67e74705SXin Li // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vuzp1q_f32(float32x4_t a,float32x4_t b)115*67e74705SXin Li float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
116*67e74705SXin Li   return vuzp1q_f32(a, b);
117*67e74705SXin Li }
118*67e74705SXin Li 
119*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) #0 {
120*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
121*67e74705SXin Li // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vuzp1q_f64(float64x2_t a,float64x2_t b)122*67e74705SXin Li float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
123*67e74705SXin Li   return vuzp1q_f64(a, b);
124*67e74705SXin Li }
125*67e74705SXin Li 
126*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
127*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
128*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp1_p8(poly8x8_t a,poly8x8_t b)129*67e74705SXin Li poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
130*67e74705SXin Li   return vuzp1_p8(a, b);
131*67e74705SXin Li }
132*67e74705SXin Li 
133*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
134*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
135*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp1q_p8(poly8x16_t a,poly8x16_t b)136*67e74705SXin Li poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
137*67e74705SXin Li   return vuzp1q_p8(a, b);
138*67e74705SXin Li }
139*67e74705SXin Li 
140*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
141*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
142*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp1_p16(poly16x4_t a,poly16x4_t b)143*67e74705SXin Li poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
144*67e74705SXin Li   return vuzp1_p16(a, b);
145*67e74705SXin Li }
146*67e74705SXin Li 
147*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
148*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
149*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp1q_p16(poly16x8_t a,poly16x8_t b)150*67e74705SXin Li poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
151*67e74705SXin Li   return vuzp1q_p16(a, b);
152*67e74705SXin Li }
153*67e74705SXin Li 
154*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
155*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
156*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp2_s8(int8x8_t a,int8x8_t b)157*67e74705SXin Li int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
158*67e74705SXin Li   return vuzp2_s8(a, b);
159*67e74705SXin Li }
160*67e74705SXin Li 
161*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
162*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
163*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp2q_s8(int8x16_t a,int8x16_t b)164*67e74705SXin Li int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
165*67e74705SXin Li   return vuzp2q_s8(a, b);
166*67e74705SXin Li }
167*67e74705SXin Li 
168*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
169*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
170*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp2_s16(int16x4_t a,int16x4_t b)171*67e74705SXin Li int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
172*67e74705SXin Li   return vuzp2_s16(a, b);
173*67e74705SXin Li }
174*67e74705SXin Li 
175*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
176*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
177*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp2q_s16(int16x8_t a,int16x8_t b)178*67e74705SXin Li int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
179*67e74705SXin Li   return vuzp2q_s16(a, b);
180*67e74705SXin Li }
181*67e74705SXin Li 
182*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
183*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
184*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vuzp2_s32(int32x2_t a,int32x2_t b)185*67e74705SXin Li int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
186*67e74705SXin Li   return vuzp2_s32(a, b);
187*67e74705SXin Li }
188*67e74705SXin Li 
189*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
190*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
191*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vuzp2q_s32(int32x4_t a,int32x4_t b)192*67e74705SXin Li int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
193*67e74705SXin Li   return vuzp2q_s32(a, b);
194*67e74705SXin Li }
195*67e74705SXin Li 
196*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
197*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
198*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp2q_s64(int64x2_t a,int64x2_t b)199*67e74705SXin Li int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
200*67e74705SXin Li   return vuzp2q_s64(a, b);
201*67e74705SXin Li }
202*67e74705SXin Li 
203*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
204*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
205*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp2_u8(uint8x8_t a,uint8x8_t b)206*67e74705SXin Li uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
207*67e74705SXin Li   return vuzp2_u8(a, b);
208*67e74705SXin Li }
209*67e74705SXin Li 
210*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
211*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
212*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp2q_u8(uint8x16_t a,uint8x16_t b)213*67e74705SXin Li uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
214*67e74705SXin Li   return vuzp2q_u8(a, b);
215*67e74705SXin Li }
216*67e74705SXin Li 
217*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
218*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
219*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp2_u16(uint16x4_t a,uint16x4_t b)220*67e74705SXin Li uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
221*67e74705SXin Li   return vuzp2_u16(a, b);
222*67e74705SXin Li }
223*67e74705SXin Li 
224*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
225*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
226*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp2q_u16(uint16x8_t a,uint16x8_t b)227*67e74705SXin Li uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
228*67e74705SXin Li   return vuzp2q_u16(a, b);
229*67e74705SXin Li }
230*67e74705SXin Li 
231*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
232*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
233*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vuzp2_u32(uint32x2_t a,uint32x2_t b)234*67e74705SXin Li uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
235*67e74705SXin Li   return vuzp2_u32(a, b);
236*67e74705SXin Li }
237*67e74705SXin Li 
238*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
239*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
240*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vuzp2q_u32(uint32x4_t a,uint32x4_t b)241*67e74705SXin Li uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
242*67e74705SXin Li   return vuzp2q_u32(a, b);
243*67e74705SXin Li }
244*67e74705SXin Li 
245*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
246*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
247*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp2q_u64(uint64x2_t a,uint64x2_t b)248*67e74705SXin Li uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
249*67e74705SXin Li   return vuzp2q_u64(a, b);
250*67e74705SXin Li }
251*67e74705SXin Li 
252*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) #0 {
253*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
254*67e74705SXin Li // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vuzp2_f32(float32x2_t a,float32x2_t b)255*67e74705SXin Li float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
256*67e74705SXin Li   return vuzp2_f32(a, b);
257*67e74705SXin Li }
258*67e74705SXin Li 
259*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) #0 {
260*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
261*67e74705SXin Li // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vuzp2q_f32(float32x4_t a,float32x4_t b)262*67e74705SXin Li float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
263*67e74705SXin Li   return vuzp2q_f32(a, b);
264*67e74705SXin Li }
265*67e74705SXin Li 
266*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) #0 {
267*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
268*67e74705SXin Li // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vuzp2q_f64(float64x2_t a,float64x2_t b)269*67e74705SXin Li float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
270*67e74705SXin Li   return vuzp2q_f64(a, b);
271*67e74705SXin Li }
272*67e74705SXin Li 
273*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
274*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
275*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp2_p8(poly8x8_t a,poly8x8_t b)276*67e74705SXin Li poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
277*67e74705SXin Li   return vuzp2_p8(a, b);
278*67e74705SXin Li }
279*67e74705SXin Li 
280*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
281*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
282*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp2q_p8(poly8x16_t a,poly8x16_t b)283*67e74705SXin Li poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
284*67e74705SXin Li   return vuzp2q_p8(a, b);
285*67e74705SXin Li }
286*67e74705SXin Li 
287*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
288*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
289*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp2_p16(poly16x4_t a,poly16x4_t b)290*67e74705SXin Li poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
291*67e74705SXin Li   return vuzp2_p16(a, b);
292*67e74705SXin Li }
293*67e74705SXin Li 
294*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
295*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
296*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp2q_p16(poly16x8_t a,poly16x8_t b)297*67e74705SXin Li poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
298*67e74705SXin Li   return vuzp2q_p16(a, b);
299*67e74705SXin Li }
300*67e74705SXin Li 
301*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
302*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
303*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip1_s8(int8x8_t a,int8x8_t b)304*67e74705SXin Li int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
305*67e74705SXin Li   return vzip1_s8(a, b);
306*67e74705SXin Li }
307*67e74705SXin Li 
308*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
309*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
310*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip1q_s8(int8x16_t a,int8x16_t b)311*67e74705SXin Li int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
312*67e74705SXin Li   return vzip1q_s8(a, b);
313*67e74705SXin Li }
314*67e74705SXin Li 
315*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
316*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
317*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip1_s16(int16x4_t a,int16x4_t b)318*67e74705SXin Li int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
319*67e74705SXin Li   return vzip1_s16(a, b);
320*67e74705SXin Li }
321*67e74705SXin Li 
322*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
323*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
324*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip1q_s16(int16x8_t a,int16x8_t b)325*67e74705SXin Li int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
326*67e74705SXin Li   return vzip1q_s16(a, b);
327*67e74705SXin Li }
328*67e74705SXin Li 
329*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
330*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
331*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vzip1_s32(int32x2_t a,int32x2_t b)332*67e74705SXin Li int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
333*67e74705SXin Li   return vzip1_s32(a, b);
334*67e74705SXin Li }
335*67e74705SXin Li 
336*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
337*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
338*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vzip1q_s32(int32x4_t a,int32x4_t b)339*67e74705SXin Li int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
340*67e74705SXin Li   return vzip1q_s32(a, b);
341*67e74705SXin Li }
342*67e74705SXin Li 
343*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
344*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
345*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip1q_s64(int64x2_t a,int64x2_t b)346*67e74705SXin Li int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
347*67e74705SXin Li   return vzip1q_s64(a, b);
348*67e74705SXin Li }
349*67e74705SXin Li 
350*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
351*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
352*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip1_u8(uint8x8_t a,uint8x8_t b)353*67e74705SXin Li uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
354*67e74705SXin Li   return vzip1_u8(a, b);
355*67e74705SXin Li }
356*67e74705SXin Li 
357*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
358*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
359*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip1q_u8(uint8x16_t a,uint8x16_t b)360*67e74705SXin Li uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
361*67e74705SXin Li   return vzip1q_u8(a, b);
362*67e74705SXin Li }
363*67e74705SXin Li 
364*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
365*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
366*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip1_u16(uint16x4_t a,uint16x4_t b)367*67e74705SXin Li uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
368*67e74705SXin Li   return vzip1_u16(a, b);
369*67e74705SXin Li }
370*67e74705SXin Li 
371*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
372*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
373*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip1q_u16(uint16x8_t a,uint16x8_t b)374*67e74705SXin Li uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
375*67e74705SXin Li   return vzip1q_u16(a, b);
376*67e74705SXin Li }
377*67e74705SXin Li 
378*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
379*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
380*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vzip1_u32(uint32x2_t a,uint32x2_t b)381*67e74705SXin Li uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
382*67e74705SXin Li   return vzip1_u32(a, b);
383*67e74705SXin Li }
384*67e74705SXin Li 
385*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
386*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
387*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vzip1q_u32(uint32x4_t a,uint32x4_t b)388*67e74705SXin Li uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
389*67e74705SXin Li   return vzip1q_u32(a, b);
390*67e74705SXin Li }
391*67e74705SXin Li 
392*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
393*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
394*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip1q_u64(uint64x2_t a,uint64x2_t b)395*67e74705SXin Li uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
396*67e74705SXin Li   return vzip1q_u64(a, b);
397*67e74705SXin Li }
398*67e74705SXin Li 
399*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) #0 {
400*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
401*67e74705SXin Li // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vzip1_f32(float32x2_t a,float32x2_t b)402*67e74705SXin Li float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
403*67e74705SXin Li   return vzip1_f32(a, b);
404*67e74705SXin Li }
405*67e74705SXin Li 
406*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) #0 {
407*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
408*67e74705SXin Li // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vzip1q_f32(float32x4_t a,float32x4_t b)409*67e74705SXin Li float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
410*67e74705SXin Li   return vzip1q_f32(a, b);
411*67e74705SXin Li }
412*67e74705SXin Li 
413*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) #0 {
414*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
415*67e74705SXin Li // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vzip1q_f64(float64x2_t a,float64x2_t b)416*67e74705SXin Li float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
417*67e74705SXin Li   return vzip1q_f64(a, b);
418*67e74705SXin Li }
419*67e74705SXin Li 
420*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
421*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
422*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip1_p8(poly8x8_t a,poly8x8_t b)423*67e74705SXin Li poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
424*67e74705SXin Li   return vzip1_p8(a, b);
425*67e74705SXin Li }
426*67e74705SXin Li 
427*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
428*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
429*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip1q_p8(poly8x16_t a,poly8x16_t b)430*67e74705SXin Li poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
431*67e74705SXin Li   return vzip1q_p8(a, b);
432*67e74705SXin Li }
433*67e74705SXin Li 
434*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
435*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
436*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip1_p16(poly16x4_t a,poly16x4_t b)437*67e74705SXin Li poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
438*67e74705SXin Li   return vzip1_p16(a, b);
439*67e74705SXin Li }
440*67e74705SXin Li 
441*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
442*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
443*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip1q_p16(poly16x8_t a,poly16x8_t b)444*67e74705SXin Li poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
445*67e74705SXin Li   return vzip1q_p16(a, b);
446*67e74705SXin Li }
447*67e74705SXin Li 
448*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
449*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
450*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip2_s8(int8x8_t a,int8x8_t b)451*67e74705SXin Li int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
452*67e74705SXin Li   return vzip2_s8(a, b);
453*67e74705SXin Li }
454*67e74705SXin Li 
455*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
456*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
457*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip2q_s8(int8x16_t a,int8x16_t b)458*67e74705SXin Li int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
459*67e74705SXin Li   return vzip2q_s8(a, b);
460*67e74705SXin Li }
461*67e74705SXin Li 
462*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
463*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
464*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip2_s16(int16x4_t a,int16x4_t b)465*67e74705SXin Li int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
466*67e74705SXin Li   return vzip2_s16(a, b);
467*67e74705SXin Li }
468*67e74705SXin Li 
469*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
470*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
471*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip2q_s16(int16x8_t a,int16x8_t b)472*67e74705SXin Li int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
473*67e74705SXin Li   return vzip2q_s16(a, b);
474*67e74705SXin Li }
475*67e74705SXin Li 
476*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
477*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
478*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vzip2_s32(int32x2_t a,int32x2_t b)479*67e74705SXin Li int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
480*67e74705SXin Li   return vzip2_s32(a, b);
481*67e74705SXin Li }
482*67e74705SXin Li 
483*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
484*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
485*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vzip2q_s32(int32x4_t a,int32x4_t b)486*67e74705SXin Li int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
487*67e74705SXin Li   return vzip2q_s32(a, b);
488*67e74705SXin Li }
489*67e74705SXin Li 
490*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
491*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
492*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip2q_s64(int64x2_t a,int64x2_t b)493*67e74705SXin Li int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
494*67e74705SXin Li   return vzip2q_s64(a, b);
495*67e74705SXin Li }
496*67e74705SXin Li 
497*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
498*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
499*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip2_u8(uint8x8_t a,uint8x8_t b)500*67e74705SXin Li uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
501*67e74705SXin Li   return vzip2_u8(a, b);
502*67e74705SXin Li }
503*67e74705SXin Li 
504*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
505*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
506*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip2q_u8(uint8x16_t a,uint8x16_t b)507*67e74705SXin Li uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
508*67e74705SXin Li   return vzip2q_u8(a, b);
509*67e74705SXin Li }
510*67e74705SXin Li 
511*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
512*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
513*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip2_u16(uint16x4_t a,uint16x4_t b)514*67e74705SXin Li uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
515*67e74705SXin Li   return vzip2_u16(a, b);
516*67e74705SXin Li }
517*67e74705SXin Li 
518*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
519*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
520*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip2q_u16(uint16x8_t a,uint16x8_t b)521*67e74705SXin Li uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
522*67e74705SXin Li   return vzip2q_u16(a, b);
523*67e74705SXin Li }
524*67e74705SXin Li 
525*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
526*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
527*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vzip2_u32(uint32x2_t a,uint32x2_t b)528*67e74705SXin Li uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
529*67e74705SXin Li   return vzip2_u32(a, b);
530*67e74705SXin Li }
531*67e74705SXin Li 
532*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
533*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
534*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vzip2q_u32(uint32x4_t a,uint32x4_t b)535*67e74705SXin Li uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
536*67e74705SXin Li   return vzip2q_u32(a, b);
537*67e74705SXin Li }
538*67e74705SXin Li 
539*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
540*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
541*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip2q_u64(uint64x2_t a,uint64x2_t b)542*67e74705SXin Li uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
543*67e74705SXin Li   return vzip2q_u64(a, b);
544*67e74705SXin Li }
545*67e74705SXin Li 
546*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) #0 {
547*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
548*67e74705SXin Li // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vzip2_f32(float32x2_t a,float32x2_t b)549*67e74705SXin Li float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
550*67e74705SXin Li   return vzip2_f32(a, b);
551*67e74705SXin Li }
552*67e74705SXin Li 
553*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) #0 {
554*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
555*67e74705SXin Li // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vzip2q_f32(float32x4_t a,float32x4_t b)556*67e74705SXin Li float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
557*67e74705SXin Li   return vzip2q_f32(a, b);
558*67e74705SXin Li }
559*67e74705SXin Li 
560*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) #0 {
561*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
562*67e74705SXin Li // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vzip2q_f64(float64x2_t a,float64x2_t b)563*67e74705SXin Li float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
564*67e74705SXin Li   return vzip2q_f64(a, b);
565*67e74705SXin Li }
566*67e74705SXin Li 
567*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
568*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
569*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip2_p8(poly8x8_t a,poly8x8_t b)570*67e74705SXin Li poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
571*67e74705SXin Li   return vzip2_p8(a, b);
572*67e74705SXin Li }
573*67e74705SXin Li 
574*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
575*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
576*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip2q_p8(poly8x16_t a,poly8x16_t b)577*67e74705SXin Li poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
578*67e74705SXin Li   return vzip2q_p8(a, b);
579*67e74705SXin Li }
580*67e74705SXin Li 
581*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
582*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
583*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip2_p16(poly16x4_t a,poly16x4_t b)584*67e74705SXin Li poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
585*67e74705SXin Li   return vzip2_p16(a, b);
586*67e74705SXin Li }
587*67e74705SXin Li 
588*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
589*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
590*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip2q_p16(poly16x8_t a,poly16x8_t b)591*67e74705SXin Li poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
592*67e74705SXin Li   return vzip2q_p16(a, b);
593*67e74705SXin Li }
594*67e74705SXin Li 
595*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
596*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
597*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn1_s8(int8x8_t a,int8x8_t b)598*67e74705SXin Li int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
599*67e74705SXin Li   return vtrn1_s8(a, b);
600*67e74705SXin Li }
601*67e74705SXin Li 
602*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
603*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
604*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn1q_s8(int8x16_t a,int8x16_t b)605*67e74705SXin Li int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
606*67e74705SXin Li   return vtrn1q_s8(a, b);
607*67e74705SXin Li }
608*67e74705SXin Li 
609*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
610*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
611*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn1_s16(int16x4_t a,int16x4_t b)612*67e74705SXin Li int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
613*67e74705SXin Li   return vtrn1_s16(a, b);
614*67e74705SXin Li }
615*67e74705SXin Li 
616*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
617*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
618*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn1q_s16(int16x8_t a,int16x8_t b)619*67e74705SXin Li int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
620*67e74705SXin Li   return vtrn1q_s16(a, b);
621*67e74705SXin Li }
622*67e74705SXin Li 
623*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
624*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
625*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vtrn1_s32(int32x2_t a,int32x2_t b)626*67e74705SXin Li int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
627*67e74705SXin Li   return vtrn1_s32(a, b);
628*67e74705SXin Li }
629*67e74705SXin Li 
630*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
631*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
632*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vtrn1q_s32(int32x4_t a,int32x4_t b)633*67e74705SXin Li int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
634*67e74705SXin Li   return vtrn1q_s32(a, b);
635*67e74705SXin Li }
636*67e74705SXin Li 
637*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
638*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
639*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn1q_s64(int64x2_t a,int64x2_t b)640*67e74705SXin Li int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
641*67e74705SXin Li   return vtrn1q_s64(a, b);
642*67e74705SXin Li }
643*67e74705SXin Li 
644*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
645*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
646*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn1_u8(uint8x8_t a,uint8x8_t b)647*67e74705SXin Li uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
648*67e74705SXin Li   return vtrn1_u8(a, b);
649*67e74705SXin Li }
650*67e74705SXin Li 
651*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
652*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
653*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn1q_u8(uint8x16_t a,uint8x16_t b)654*67e74705SXin Li uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
655*67e74705SXin Li   return vtrn1q_u8(a, b);
656*67e74705SXin Li }
657*67e74705SXin Li 
658*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
659*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
660*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn1_u16(uint16x4_t a,uint16x4_t b)661*67e74705SXin Li uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
662*67e74705SXin Li   return vtrn1_u16(a, b);
663*67e74705SXin Li }
664*67e74705SXin Li 
665*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
666*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
667*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn1q_u16(uint16x8_t a,uint16x8_t b)668*67e74705SXin Li uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
669*67e74705SXin Li   return vtrn1q_u16(a, b);
670*67e74705SXin Li }
671*67e74705SXin Li 
672*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
673*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
674*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vtrn1_u32(uint32x2_t a,uint32x2_t b)675*67e74705SXin Li uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
676*67e74705SXin Li   return vtrn1_u32(a, b);
677*67e74705SXin Li }
678*67e74705SXin Li 
679*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
680*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
681*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vtrn1q_u32(uint32x4_t a,uint32x4_t b)682*67e74705SXin Li uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
683*67e74705SXin Li   return vtrn1q_u32(a, b);
684*67e74705SXin Li }
685*67e74705SXin Li 
686*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
687*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
688*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn1q_u64(uint64x2_t a,uint64x2_t b)689*67e74705SXin Li uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
690*67e74705SXin Li   return vtrn1q_u64(a, b);
691*67e74705SXin Li }
692*67e74705SXin Li 
693*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) #0 {
694*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
695*67e74705SXin Li // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vtrn1_f32(float32x2_t a,float32x2_t b)696*67e74705SXin Li float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
697*67e74705SXin Li   return vtrn1_f32(a, b);
698*67e74705SXin Li }
699*67e74705SXin Li 
700*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) #0 {
701*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
702*67e74705SXin Li // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vtrn1q_f32(float32x4_t a,float32x4_t b)703*67e74705SXin Li float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
704*67e74705SXin Li   return vtrn1q_f32(a, b);
705*67e74705SXin Li }
706*67e74705SXin Li 
707*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) #0 {
708*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
709*67e74705SXin Li // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vtrn1q_f64(float64x2_t a,float64x2_t b)710*67e74705SXin Li float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
711*67e74705SXin Li   return vtrn1q_f64(a, b);
712*67e74705SXin Li }
713*67e74705SXin Li 
714*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
715*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
716*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn1_p8(poly8x8_t a,poly8x8_t b)717*67e74705SXin Li poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
718*67e74705SXin Li   return vtrn1_p8(a, b);
719*67e74705SXin Li }
720*67e74705SXin Li 
721*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
722*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
723*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn1q_p8(poly8x16_t a,poly8x16_t b)724*67e74705SXin Li poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
725*67e74705SXin Li   return vtrn1q_p8(a, b);
726*67e74705SXin Li }
727*67e74705SXin Li 
728*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
729*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
730*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn1_p16(poly16x4_t a,poly16x4_t b)731*67e74705SXin Li poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
732*67e74705SXin Li   return vtrn1_p16(a, b);
733*67e74705SXin Li }
734*67e74705SXin Li 
735*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
736*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
737*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn1q_p16(poly16x8_t a,poly16x8_t b)738*67e74705SXin Li poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
739*67e74705SXin Li   return vtrn1q_p16(a, b);
740*67e74705SXin Li }
741*67e74705SXin Li 
742*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
743*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
744*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn2_s8(int8x8_t a,int8x8_t b)745*67e74705SXin Li int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
746*67e74705SXin Li   return vtrn2_s8(a, b);
747*67e74705SXin Li }
748*67e74705SXin Li 
749*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
750*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
751*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn2q_s8(int8x16_t a,int8x16_t b)752*67e74705SXin Li int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
753*67e74705SXin Li   return vtrn2q_s8(a, b);
754*67e74705SXin Li }
755*67e74705SXin Li 
756*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
757*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
758*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn2_s16(int16x4_t a,int16x4_t b)759*67e74705SXin Li int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
760*67e74705SXin Li   return vtrn2_s16(a, b);
761*67e74705SXin Li }
762*67e74705SXin Li 
763*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
764*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
765*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn2q_s16(int16x8_t a,int16x8_t b)766*67e74705SXin Li int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
767*67e74705SXin Li   return vtrn2q_s16(a, b);
768*67e74705SXin Li }
769*67e74705SXin Li 
770*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
771*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
772*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vtrn2_s32(int32x2_t a,int32x2_t b)773*67e74705SXin Li int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
774*67e74705SXin Li   return vtrn2_s32(a, b);
775*67e74705SXin Li }
776*67e74705SXin Li 
777*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
778*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
779*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vtrn2q_s32(int32x4_t a,int32x4_t b)780*67e74705SXin Li int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
781*67e74705SXin Li   return vtrn2q_s32(a, b);
782*67e74705SXin Li }
783*67e74705SXin Li 
784*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
785*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
786*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn2q_s64(int64x2_t a,int64x2_t b)787*67e74705SXin Li int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
788*67e74705SXin Li   return vtrn2q_s64(a, b);
789*67e74705SXin Li }
790*67e74705SXin Li 
791*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
792*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
793*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn2_u8(uint8x8_t a,uint8x8_t b)794*67e74705SXin Li uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
795*67e74705SXin Li   return vtrn2_u8(a, b);
796*67e74705SXin Li }
797*67e74705SXin Li 
798*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
799*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
800*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn2q_u8(uint8x16_t a,uint8x16_t b)801*67e74705SXin Li uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
802*67e74705SXin Li   return vtrn2q_u8(a, b);
803*67e74705SXin Li }
804*67e74705SXin Li 
805*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
806*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
807*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn2_u16(uint16x4_t a,uint16x4_t b)808*67e74705SXin Li uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
809*67e74705SXin Li   return vtrn2_u16(a, b);
810*67e74705SXin Li }
811*67e74705SXin Li 
812*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
813*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
814*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn2q_u16(uint16x8_t a,uint16x8_t b)815*67e74705SXin Li uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
816*67e74705SXin Li   return vtrn2q_u16(a, b);
817*67e74705SXin Li }
818*67e74705SXin Li 
819*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
820*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
821*67e74705SXin Li // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vtrn2_u32(uint32x2_t a,uint32x2_t b)822*67e74705SXin Li uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
823*67e74705SXin Li   return vtrn2_u32(a, b);
824*67e74705SXin Li }
825*67e74705SXin Li 
826*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
827*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
828*67e74705SXin Li // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vtrn2q_u32(uint32x4_t a,uint32x4_t b)829*67e74705SXin Li uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
830*67e74705SXin Li   return vtrn2q_u32(a, b);
831*67e74705SXin Li }
832*67e74705SXin Li 
833*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
834*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
835*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn2q_u64(uint64x2_t a,uint64x2_t b)836*67e74705SXin Li uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
837*67e74705SXin Li   return vtrn2q_u64(a, b);
838*67e74705SXin Li }
839*67e74705SXin Li 
840*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) #0 {
841*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
842*67e74705SXin Li // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vtrn2_f32(float32x2_t a,float32x2_t b)843*67e74705SXin Li float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
844*67e74705SXin Li   return vtrn2_f32(a, b);
845*67e74705SXin Li }
846*67e74705SXin Li 
847*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) #0 {
848*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
849*67e74705SXin Li // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vtrn2q_f32(float32x4_t a,float32x4_t b)850*67e74705SXin Li float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
851*67e74705SXin Li   return vtrn2q_f32(a, b);
852*67e74705SXin Li }
853*67e74705SXin Li 
854*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) #0 {
855*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
856*67e74705SXin Li // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vtrn2q_f64(float64x2_t a,float64x2_t b)857*67e74705SXin Li float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
858*67e74705SXin Li   return vtrn2q_f64(a, b);
859*67e74705SXin Li }
860*67e74705SXin Li 
861*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
862*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
863*67e74705SXin Li // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn2_p8(poly8x8_t a,poly8x8_t b)864*67e74705SXin Li poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
865*67e74705SXin Li   return vtrn2_p8(a, b);
866*67e74705SXin Li }
867*67e74705SXin Li 
868*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
869*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
870*67e74705SXin Li // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn2q_p8(poly8x16_t a,poly8x16_t b)871*67e74705SXin Li poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
872*67e74705SXin Li   return vtrn2q_p8(a, b);
873*67e74705SXin Li }
874*67e74705SXin Li 
875*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
876*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
877*67e74705SXin Li // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn2_p16(poly16x4_t a,poly16x4_t b)878*67e74705SXin Li poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
879*67e74705SXin Li   return vtrn2_p16(a, b);
880*67e74705SXin Li }
881*67e74705SXin Li 
882*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
883*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
884*67e74705SXin Li // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn2q_p16(poly16x8_t a,poly16x8_t b)885*67e74705SXin Li poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
886*67e74705SXin Li   return vtrn2q_p16(a, b);
887*67e74705SXin Li }
888*67e74705SXin Li 
889*67e74705SXin Li // CHECK-LABEL: define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) #0 {
890*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
891*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
892*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
893*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
894*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
895*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
896*67e74705SXin Li // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
897*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
898*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
899*67e74705SXin Li // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
900*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
901*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
902*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
903*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
904*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
905*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
906*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
907*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
908*67e74705SXin Li // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
test_vuzp_s8(int8x8_t a,int8x8_t b)909*67e74705SXin Li int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
910*67e74705SXin Li   return vuzp_s8(a, b);
911*67e74705SXin Li }
912*67e74705SXin Li 
913*67e74705SXin Li // CHECK-LABEL: define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) #0 {
914*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
915*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
916*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
917*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
918*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
919*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
920*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
921*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
922*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
923*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
924*67e74705SXin Li // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
925*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
926*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
927*67e74705SXin Li // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
928*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
929*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
930*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
931*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
932*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
933*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
934*67e74705SXin Li // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
935*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
936*67e74705SXin Li // CHECK:   ret %struct.int16x4x2_t [[TMP12]]
test_vuzp_s16(int16x4_t a,int16x4_t b)937*67e74705SXin Li int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
938*67e74705SXin Li   return vuzp_s16(a, b);
939*67e74705SXin Li }
940*67e74705SXin Li // CHECK-LABEL: define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) #0 {
941*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
942*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
943*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
944*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
945*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
946*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
947*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
948*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
949*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
950*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
951*67e74705SXin Li // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
952*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
953*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
954*67e74705SXin Li // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
955*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
956*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
957*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
958*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
959*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
960*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
961*67e74705SXin Li // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
962*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
963*67e74705SXin Li // CHECK:   ret %struct.int32x2x2_t [[TMP12]]
test_vuzp_s32(int32x2_t a,int32x2_t b)964*67e74705SXin Li int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
965*67e74705SXin Li   return vuzp_s32(a, b);
966*67e74705SXin Li }
967*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) #0 {
968*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
969*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
970*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
971*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
972*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
973*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
974*67e74705SXin Li // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
975*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
976*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
977*67e74705SXin Li // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
978*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
979*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
980*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
981*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
982*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
983*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
984*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
985*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
986*67e74705SXin Li // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
test_vuzp_u8(uint8x8_t a,uint8x8_t b)987*67e74705SXin Li uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
988*67e74705SXin Li   return vuzp_u8(a, b);
989*67e74705SXin Li }
990*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) #0 {
991*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
992*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
993*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
994*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
995*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
996*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
997*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
998*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
999*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1000*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1001*67e74705SXin Li // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
1002*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1003*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1004*67e74705SXin Li // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
1005*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1006*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1007*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1008*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1009*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1010*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
1011*67e74705SXin Li // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1012*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1013*67e74705SXin Li // CHECK:   ret %struct.uint16x4x2_t [[TMP12]]
test_vuzp_u16(uint16x4_t a,uint16x4_t b)1014*67e74705SXin Li uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
1015*67e74705SXin Li   return vuzp_u16(a, b);
1016*67e74705SXin Li }
1017*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1018*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1019*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1020*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1021*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1022*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1023*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1024*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1025*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1026*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1027*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1028*67e74705SXin Li // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
1029*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1030*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1031*67e74705SXin Li // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
1032*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1033*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1034*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1035*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1036*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1037*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
1038*67e74705SXin Li // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1039*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1040*67e74705SXin Li // CHECK:   ret %struct.uint32x2x2_t [[TMP12]]
test_vuzp_u32(uint32x2_t a,uint32x2_t b)1041*67e74705SXin Li uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
1042*67e74705SXin Li   return vuzp_u32(a, b);
1043*67e74705SXin Li }
1044*67e74705SXin Li // CHECK-LABEL: define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) #0 {
1045*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1046*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1047*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1048*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1049*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1050*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1051*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1052*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1053*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1054*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
1055*67e74705SXin Li // CHECK:   store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]]
1056*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1057*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
1058*67e74705SXin Li // CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP6]]
1059*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1060*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1061*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1062*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1063*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1064*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
1065*67e74705SXin Li // CHECK:   store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
1066*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1067*67e74705SXin Li // CHECK:   ret %struct.float32x2x2_t [[TMP12]]
test_vuzp_f32(float32x2_t a,float32x2_t b)1068*67e74705SXin Li float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
1069*67e74705SXin Li   return vuzp_f32(a, b);
1070*67e74705SXin Li }
1071*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) #0 {
1072*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1073*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1074*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1075*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1076*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1077*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1078*67e74705SXin Li // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
1079*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1080*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1081*67e74705SXin Li // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
1082*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1083*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1084*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1085*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1086*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1087*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1088*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1089*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1090*67e74705SXin Li // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
test_vuzp_p8(poly8x8_t a,poly8x8_t b)1091*67e74705SXin Li poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
1092*67e74705SXin Li   return vuzp_p8(a, b);
1093*67e74705SXin Li }
1094*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) #0 {
1095*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1096*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1097*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1098*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1099*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1100*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1101*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1102*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1103*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1104*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1105*67e74705SXin Li // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
1106*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1107*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1108*67e74705SXin Li // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
1109*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
1110*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1111*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1112*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
1113*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
1114*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
1115*67e74705SXin Li // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1116*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
1117*67e74705SXin Li // CHECK:   ret %struct.poly16x4x2_t [[TMP12]]
test_vuzp_p16(poly16x4_t a,poly16x4_t b)1118*67e74705SXin Li poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
1119*67e74705SXin Li   return vuzp_p16(a, b);
1120*67e74705SXin Li }
1121*67e74705SXin Li // CHECK-LABEL: define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
1122*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1123*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1124*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1125*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1126*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1127*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1128*67e74705SXin Li // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1129*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1130*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1131*67e74705SXin Li // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1132*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
1133*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1134*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1135*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
1136*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
1137*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1138*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1139*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
1140*67e74705SXin Li // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
test_vuzpq_s8(int8x16_t a,int8x16_t b)1141*67e74705SXin Li int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
1142*67e74705SXin Li   return vuzpq_s8(a, b);
1143*67e74705SXin Li }
1144*67e74705SXin Li // CHECK-LABEL: define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
1145*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1146*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1147*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1148*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1149*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1150*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1151*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1152*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1153*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1154*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1155*67e74705SXin Li // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1156*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1157*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1158*67e74705SXin Li // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
1159*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
1160*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1161*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1162*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
1163*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
1164*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
1165*67e74705SXin Li // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1166*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
1167*67e74705SXin Li // CHECK:   ret %struct.int16x8x2_t [[TMP12]]
test_vuzpq_s16(int16x8_t a,int16x8_t b)1168*67e74705SXin Li int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
1169*67e74705SXin Li   return vuzpq_s16(a, b);
1170*67e74705SXin Li }
1171*67e74705SXin Li // CHECK-LABEL: define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
1172*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1173*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1174*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1175*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1176*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1177*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1178*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1179*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1180*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1181*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1182*67e74705SXin Li // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
1183*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1184*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1185*67e74705SXin Li // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
1186*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
1187*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1188*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1189*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
1190*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
1191*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
1192*67e74705SXin Li // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
1193*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
1194*67e74705SXin Li // CHECK:   ret %struct.int32x4x2_t [[TMP12]]
test_vuzpq_s32(int32x4_t a,int32x4_t b)1195*67e74705SXin Li int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
1196*67e74705SXin Li   return vuzpq_s32(a, b);
1197*67e74705SXin Li }
1198*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
1199*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1200*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1201*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1202*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1203*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1204*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1205*67e74705SXin Li // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1206*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1207*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1208*67e74705SXin Li // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1209*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
1210*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1211*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1212*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
1213*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
1214*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1215*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1216*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
1217*67e74705SXin Li // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
test_vuzpq_u8(uint8x16_t a,uint8x16_t b)1218*67e74705SXin Li uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
1219*67e74705SXin Li   return vuzpq_u8(a, b);
1220*67e74705SXin Li }
1221*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
1222*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1223*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1224*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1225*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1226*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1227*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1228*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1229*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1230*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1231*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1232*67e74705SXin Li // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1233*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1234*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1235*67e74705SXin Li // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
1236*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
1237*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1238*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1239*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
1240*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
1241*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
1242*67e74705SXin Li // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1243*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
1244*67e74705SXin Li // CHECK:   ret %struct.uint16x8x2_t [[TMP12]]
test_vuzpq_u16(uint16x8_t a,uint16x8_t b)1245*67e74705SXin Li uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
1246*67e74705SXin Li   return vuzpq_u16(a, b);
1247*67e74705SXin Li }
1248*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
1249*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1250*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1251*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1252*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1253*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1254*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1255*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1256*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1257*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1258*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1259*67e74705SXin Li // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
1260*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1261*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1262*67e74705SXin Li // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
1263*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
1264*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1265*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1266*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
1267*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
1268*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
1269*67e74705SXin Li // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
1270*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
1271*67e74705SXin Li // CHECK:   ret %struct.uint32x4x2_t [[TMP12]]
test_vuzpq_u32(uint32x4_t a,uint32x4_t b)1272*67e74705SXin Li uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
1273*67e74705SXin Li   return vuzpq_u32(a, b);
1274*67e74705SXin Li }
1275*67e74705SXin Li // CHECK-LABEL: define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) #0 {
1276*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1277*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1278*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1279*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1280*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1281*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1282*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
1283*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1284*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
1285*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1286*67e74705SXin Li // CHECK:   store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]]
1287*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
1288*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1289*67e74705SXin Li // CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP6]]
1290*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
1291*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1292*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1293*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
1294*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
1295*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
1296*67e74705SXin Li // CHECK:   store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
1297*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
1298*67e74705SXin Li // CHECK:   ret %struct.float32x4x2_t [[TMP12]]
test_vuzpq_f32(float32x4_t a,float32x4_t b)1299*67e74705SXin Li float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
1300*67e74705SXin Li   return vuzpq_f32(a, b);
1301*67e74705SXin Li }
1302*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
1303*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1304*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1305*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1306*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1307*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1308*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1309*67e74705SXin Li // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1310*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1311*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1312*67e74705SXin Li // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1313*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
1314*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1315*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1316*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
1317*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
1318*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1319*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1320*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
1321*67e74705SXin Li // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
test_vuzpq_p8(poly8x16_t a,poly8x16_t b)1322*67e74705SXin Li poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
1323*67e74705SXin Li   return vuzpq_p8(a, b);
1324*67e74705SXin Li }
1325*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
1326*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1327*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1328*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1329*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1330*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1331*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1332*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1333*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1334*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1335*67e74705SXin Li // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1336*67e74705SXin Li // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1337*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1338*67e74705SXin Li // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1339*67e74705SXin Li // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
1340*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
1341*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1342*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1343*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
1344*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
1345*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
1346*67e74705SXin Li // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1347*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
1348*67e74705SXin Li // CHECK:   ret %struct.poly16x8x2_t [[TMP12]]
test_vuzpq_p16(poly16x8_t a,poly16x8_t b)1349*67e74705SXin Li poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
1350*67e74705SXin Li   return vuzpq_p16(a, b);
1351*67e74705SXin Li }
1352*67e74705SXin Li 
1353*67e74705SXin Li // CHECK-LABEL: define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) #0 {
1354*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1355*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1356*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1357*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1358*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1359*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1360*67e74705SXin Li // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1361*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1362*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1363*67e74705SXin Li // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1364*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
1365*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1366*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1367*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
1368*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
1369*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1370*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1371*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
1372*67e74705SXin Li // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
test_vzip_s8(int8x8_t a,int8x8_t b)1373*67e74705SXin Li int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
1374*67e74705SXin Li   return vzip_s8(a, b);
1375*67e74705SXin Li }
1376*67e74705SXin Li 
1377*67e74705SXin Li // CHECK-LABEL: define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1378*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1379*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1380*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1381*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1382*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1383*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1384*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1385*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1386*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1387*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1388*67e74705SXin Li // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1389*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1390*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1391*67e74705SXin Li // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
1392*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
1393*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1394*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1395*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
1396*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
1397*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
1398*67e74705SXin Li // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1399*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
1400*67e74705SXin Li // CHECK:   ret %struct.int16x4x2_t [[TMP12]]
test_vzip_s16(int16x4_t a,int16x4_t b)1401*67e74705SXin Li int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
1402*67e74705SXin Li   return vzip_s16(a, b);
1403*67e74705SXin Li }
1404*67e74705SXin Li // CHECK-LABEL: define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1405*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1406*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1407*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1408*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1409*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1410*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1411*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1412*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1413*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1414*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1415*67e74705SXin Li // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
1416*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1417*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1418*67e74705SXin Li // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
1419*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
1420*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1421*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1422*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
1423*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
1424*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
1425*67e74705SXin Li // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1426*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
1427*67e74705SXin Li // CHECK:   ret %struct.int32x2x2_t [[TMP12]]
test_vzip_s32(int32x2_t a,int32x2_t b)1428*67e74705SXin Li int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
1429*67e74705SXin Li   return vzip_s32(a, b);
1430*67e74705SXin Li }
1431*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1432*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1433*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1434*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1435*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1436*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1437*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1438*67e74705SXin Li // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1439*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1440*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1441*67e74705SXin Li // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1442*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
1443*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1444*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1445*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
1446*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
1447*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1448*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1449*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
1450*67e74705SXin Li // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
test_vzip_u8(uint8x8_t a,uint8x8_t b)1451*67e74705SXin Li uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
1452*67e74705SXin Li   return vzip_u8(a, b);
1453*67e74705SXin Li }
1454*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) #0 {
1455*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1456*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1457*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1458*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1459*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1460*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1461*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1462*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1463*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1464*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1465*67e74705SXin Li // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1466*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1467*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1468*67e74705SXin Li // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
1469*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1470*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1471*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1472*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1473*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1474*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
1475*67e74705SXin Li // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1476*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1477*67e74705SXin Li // CHECK:   ret %struct.uint16x4x2_t [[TMP12]]
test_vzip_u16(uint16x4_t a,uint16x4_t b)1478*67e74705SXin Li uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
1479*67e74705SXin Li   return vzip_u16(a, b);
1480*67e74705SXin Li }
1481*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1482*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1483*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1484*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1485*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1486*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1487*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1488*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1489*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1490*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1491*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1492*67e74705SXin Li // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
1493*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1494*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1495*67e74705SXin Li // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
1496*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1497*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1498*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1499*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1500*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1501*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
1502*67e74705SXin Li // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1503*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1504*67e74705SXin Li // CHECK:   ret %struct.uint32x2x2_t [[TMP12]]
test_vzip_u32(uint32x2_t a,uint32x2_t b)1505*67e74705SXin Li uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
1506*67e74705SXin Li   return vzip_u32(a, b);
1507*67e74705SXin Li }
1508*67e74705SXin Li // CHECK-LABEL: define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) #0 {
1509*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1510*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1511*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1512*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1513*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1514*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1515*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1516*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1517*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1518*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
1519*67e74705SXin Li // CHECK:   store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]]
1520*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1521*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
1522*67e74705SXin Li // CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP6]]
1523*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1524*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1525*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1526*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1527*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1528*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
1529*67e74705SXin Li // CHECK:   store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
1530*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1531*67e74705SXin Li // CHECK:   ret %struct.float32x2x2_t [[TMP12]]
test_vzip_f32(float32x2_t a,float32x2_t b)1532*67e74705SXin Li float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
1533*67e74705SXin Li   return vzip_f32(a, b);
1534*67e74705SXin Li }
1535*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) #0 {
1536*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1537*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1538*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1539*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1540*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1541*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1542*67e74705SXin Li // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1543*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1544*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1545*67e74705SXin Li // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1546*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1547*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1548*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1549*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1550*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1551*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1552*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1553*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1554*67e74705SXin Li // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
test_vzip_p8(poly8x8_t a,poly8x8_t b)1555*67e74705SXin Li poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
1556*67e74705SXin Li   return vzip_p8(a, b);
1557*67e74705SXin Li }
1558*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) #0 {
1559*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1560*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1561*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1562*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1563*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1564*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1565*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1566*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1567*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1568*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1569*67e74705SXin Li // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1570*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1571*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1572*67e74705SXin Li // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
1573*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
1574*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1575*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1576*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
1577*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
1578*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
1579*67e74705SXin Li // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1580*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
1581*67e74705SXin Li // CHECK:   ret %struct.poly16x4x2_t [[TMP12]]
test_vzip_p16(poly16x4_t a,poly16x4_t b)1582*67e74705SXin Li poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
1583*67e74705SXin Li   return vzip_p16(a, b);
1584*67e74705SXin Li }
1585*67e74705SXin Li // CHECK-LABEL: define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
1586*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1587*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1588*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1589*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1590*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1591*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1592*67e74705SXin Li // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1593*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1594*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1595*67e74705SXin Li // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1596*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
1597*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1598*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1599*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
1600*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
1601*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1602*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1603*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
1604*67e74705SXin Li // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
test_vzipq_s8(int8x16_t a,int8x16_t b)1605*67e74705SXin Li int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
1606*67e74705SXin Li   return vzipq_s8(a, b);
1607*67e74705SXin Li }
1608*67e74705SXin Li // CHECK-LABEL: define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
1609*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1610*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1611*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1612*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1613*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1614*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1615*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1616*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1617*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1618*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1619*67e74705SXin Li // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1620*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1621*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1622*67e74705SXin Li // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
1623*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
1624*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1625*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1626*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
1627*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
1628*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
1629*67e74705SXin Li // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1630*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
1631*67e74705SXin Li // CHECK:   ret %struct.int16x8x2_t [[TMP12]]
test_vzipq_s16(int16x8_t a,int16x8_t b)1632*67e74705SXin Li int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
1633*67e74705SXin Li   return vzipq_s16(a, b);
1634*67e74705SXin Li }
1635*67e74705SXin Li // CHECK-LABEL: define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
1636*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1637*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1638*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1639*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1640*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1641*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1642*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1643*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1644*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1645*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1646*67e74705SXin Li // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
1647*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1648*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1649*67e74705SXin Li // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
1650*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
1651*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1652*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1653*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
1654*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
1655*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
1656*67e74705SXin Li // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
1657*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
1658*67e74705SXin Li // CHECK:   ret %struct.int32x4x2_t [[TMP12]]
test_vzipq_s32(int32x4_t a,int32x4_t b)1659*67e74705SXin Li int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
1660*67e74705SXin Li   return vzipq_s32(a, b);
1661*67e74705SXin Li }
1662*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
1663*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1664*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1665*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1666*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1667*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1668*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1669*67e74705SXin Li // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1670*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1671*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1672*67e74705SXin Li // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1673*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
1674*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1675*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1676*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
1677*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
1678*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1679*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1680*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
1681*67e74705SXin Li // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
test_vzipq_u8(uint8x16_t a,uint8x16_t b)1682*67e74705SXin Li uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
1683*67e74705SXin Li   return vzipq_u8(a, b);
1684*67e74705SXin Li }
1685*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
1686*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1687*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1688*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1689*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1690*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1691*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1692*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1693*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1694*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1695*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1696*67e74705SXin Li // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1697*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1698*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1699*67e74705SXin Li // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
1700*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
1701*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1702*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1703*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
1704*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
1705*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
1706*67e74705SXin Li // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1707*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
1708*67e74705SXin Li // CHECK:   ret %struct.uint16x8x2_t [[TMP12]]
test_vzipq_u16(uint16x8_t a,uint16x8_t b)1709*67e74705SXin Li uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
1710*67e74705SXin Li   return vzipq_u16(a, b);
1711*67e74705SXin Li }
1712*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
1713*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1714*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1715*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1716*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1717*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1718*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1719*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1720*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1721*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1722*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1723*67e74705SXin Li // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
1724*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1725*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1726*67e74705SXin Li // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
1727*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
1728*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1729*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1730*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
1731*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
1732*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
1733*67e74705SXin Li // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
1734*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
1735*67e74705SXin Li // CHECK:   ret %struct.uint32x4x2_t [[TMP12]]
test_vzipq_u32(uint32x4_t a,uint32x4_t b)1736*67e74705SXin Li uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
1737*67e74705SXin Li   return vzipq_u32(a, b);
1738*67e74705SXin Li }
1739*67e74705SXin Li // CHECK-LABEL: define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) #0 {
1740*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1741*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1742*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1743*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1744*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1745*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1746*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
1747*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1748*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
1749*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1750*67e74705SXin Li // CHECK:   store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]]
1751*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
1752*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1753*67e74705SXin Li // CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP6]]
1754*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
1755*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1756*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1757*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
1758*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
1759*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
1760*67e74705SXin Li // CHECK:   store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
1761*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
1762*67e74705SXin Li // CHECK:   ret %struct.float32x4x2_t [[TMP12]]
test_vzipq_f32(float32x4_t a,float32x4_t b)1763*67e74705SXin Li float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
1764*67e74705SXin Li   return vzipq_f32(a, b);
1765*67e74705SXin Li }
1766*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
1767*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1768*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1769*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1770*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1771*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1772*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1773*67e74705SXin Li // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1774*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1775*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1776*67e74705SXin Li // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1777*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
1778*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1779*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1780*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
1781*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
1782*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1783*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1784*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
1785*67e74705SXin Li // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
test_vzipq_p8(poly8x16_t a,poly8x16_t b)1786*67e74705SXin Li poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
1787*67e74705SXin Li   return vzipq_p8(a, b);
1788*67e74705SXin Li }
1789*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
1790*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1791*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1792*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1793*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1794*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1795*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1796*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1797*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1798*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1799*67e74705SXin Li // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1800*67e74705SXin Li // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1801*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1802*67e74705SXin Li // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1803*67e74705SXin Li // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
1804*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
1805*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1806*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1807*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
1808*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
1809*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
1810*67e74705SXin Li // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1811*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
1812*67e74705SXin Li // CHECK:   ret %struct.poly16x8x2_t [[TMP12]]
test_vzipq_p16(poly16x8_t a,poly16x8_t b)1813*67e74705SXin Li poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
1814*67e74705SXin Li   return vzipq_p16(a, b);
1815*67e74705SXin Li }
1816*67e74705SXin Li 
1817*67e74705SXin Li // CHECK-LABEL: define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) #0 {
1818*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1819*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1820*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1821*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1822*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1823*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1824*67e74705SXin Li // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1825*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1826*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1827*67e74705SXin Li // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1828*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
1829*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1830*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1831*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
1832*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
1833*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1834*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1835*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
1836*67e74705SXin Li // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
test_vtrn_s8(int8x8_t a,int8x8_t b)1837*67e74705SXin Li int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
1838*67e74705SXin Li   return vtrn_s8(a, b);
1839*67e74705SXin Li }
1840*67e74705SXin Li 
1841*67e74705SXin Li // CHECK-LABEL: define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1842*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1843*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1844*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1845*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1846*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1847*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1848*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1849*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1850*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1851*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1852*67e74705SXin Li // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
1853*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1854*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1855*67e74705SXin Li // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
1856*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
1857*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1858*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1859*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
1860*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
1861*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
1862*67e74705SXin Li // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1863*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
1864*67e74705SXin Li // CHECK:   ret %struct.int16x4x2_t [[TMP12]]
test_vtrn_s16(int16x4_t a,int16x4_t b)1865*67e74705SXin Li int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
1866*67e74705SXin Li   return vtrn_s16(a, b);
1867*67e74705SXin Li }
1868*67e74705SXin Li // CHECK-LABEL: define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1869*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1870*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1871*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1872*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1873*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1874*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1875*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1876*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1877*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1878*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1879*67e74705SXin Li // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
1880*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1881*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1882*67e74705SXin Li // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
1883*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
1884*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1885*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1886*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
1887*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
1888*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
1889*67e74705SXin Li // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1890*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
1891*67e74705SXin Li // CHECK:   ret %struct.int32x2x2_t [[TMP12]]
test_vtrn_s32(int32x2_t a,int32x2_t b)1892*67e74705SXin Li int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
1893*67e74705SXin Li   return vtrn_s32(a, b);
1894*67e74705SXin Li }
1895*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1896*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1897*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1898*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1899*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1900*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1901*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1902*67e74705SXin Li // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1903*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1904*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1905*67e74705SXin Li // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1906*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
1907*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1908*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1909*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
1910*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
1911*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1912*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1913*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
1914*67e74705SXin Li // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
test_vtrn_u8(uint8x8_t a,uint8x8_t b)1915*67e74705SXin Li uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
1916*67e74705SXin Li   return vtrn_u8(a, b);
1917*67e74705SXin Li }
1918*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) #0 {
1919*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1920*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1921*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1922*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1923*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1924*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1925*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1926*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1927*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1928*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1929*67e74705SXin Li // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
1930*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1931*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1932*67e74705SXin Li // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
1933*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1934*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1935*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1936*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1937*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1938*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
1939*67e74705SXin Li // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1940*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1941*67e74705SXin Li // CHECK:   ret %struct.uint16x4x2_t [[TMP12]]
test_vtrn_u16(uint16x4_t a,uint16x4_t b)1942*67e74705SXin Li uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
1943*67e74705SXin Li   return vtrn_u16(a, b);
1944*67e74705SXin Li }
1945*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1946*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1947*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1948*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1949*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1950*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1951*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1952*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1953*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1954*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1955*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1956*67e74705SXin Li // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
1957*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1958*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1959*67e74705SXin Li // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
1960*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1961*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1962*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1963*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1964*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1965*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
1966*67e74705SXin Li // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1967*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1968*67e74705SXin Li // CHECK:   ret %struct.uint32x2x2_t [[TMP12]]
test_vtrn_u32(uint32x2_t a,uint32x2_t b)1969*67e74705SXin Li uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
1970*67e74705SXin Li   return vtrn_u32(a, b);
1971*67e74705SXin Li }
1972*67e74705SXin Li // CHECK-LABEL: define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) #0 {
1973*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1974*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1975*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1976*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1977*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1978*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1979*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1980*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1981*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1982*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
1983*67e74705SXin Li // CHECK:   store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]]
1984*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1985*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
1986*67e74705SXin Li // CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP6]]
1987*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1988*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1989*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1990*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1991*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1992*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
1993*67e74705SXin Li // CHECK:   store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
1994*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1995*67e74705SXin Li // CHECK:   ret %struct.float32x2x2_t [[TMP12]]
test_vtrn_f32(float32x2_t a,float32x2_t b)1996*67e74705SXin Li float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
1997*67e74705SXin Li   return vtrn_f32(a, b);
1998*67e74705SXin Li }
1999*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) #0 {
2000*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
2001*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
2002*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
2003*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
2004*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
2005*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2006*67e74705SXin Li // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
2007*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
2008*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2009*67e74705SXin Li // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
2010*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
2011*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
2012*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
2013*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
2014*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
2015*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
2016*67e74705SXin Li // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
2017*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
2018*67e74705SXin Li // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
test_vtrn_p8(poly8x8_t a,poly8x8_t b)2019*67e74705SXin Li poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
2020*67e74705SXin Li   return vtrn_p8(a, b);
2021*67e74705SXin Li }
2022*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) #0 {
2023*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
2024*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
2025*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
2026*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
2027*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2028*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2029*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
2030*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2031*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
2032*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2033*67e74705SXin Li // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
2034*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
2035*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2036*67e74705SXin Li // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
2037*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
2038*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
2039*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
2040*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
2041*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
2042*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
2043*67e74705SXin Li // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
2044*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
2045*67e74705SXin Li // CHECK:   ret %struct.poly16x4x2_t [[TMP12]]
test_vtrn_p16(poly16x4_t a,poly16x4_t b)2046*67e74705SXin Li poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
2047*67e74705SXin Li   return vtrn_p16(a, b);
2048*67e74705SXin Li }
2049*67e74705SXin Li // CHECK-LABEL: define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
2050*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
2051*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
2052*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
2053*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
2054*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2055*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2056*67e74705SXin Li // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2057*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2058*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2059*67e74705SXin Li // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2060*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
2061*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
2062*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
2063*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
2064*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
2065*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
2066*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2067*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
2068*67e74705SXin Li // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
test_vtrnq_s8(int8x16_t a,int8x16_t b)2069*67e74705SXin Li int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
2070*67e74705SXin Li   return vtrnq_s8(a, b);
2071*67e74705SXin Li }
2072*67e74705SXin Li // CHECK-LABEL: define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
2073*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
2074*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
2075*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
2076*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
2077*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2078*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2079*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2080*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2081*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
2082*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2083*67e74705SXin Li // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2084*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2085*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2086*67e74705SXin Li // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
2087*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
2088*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
2089*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2090*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
2091*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
2092*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
2093*67e74705SXin Li // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
2094*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
2095*67e74705SXin Li // CHECK:   ret %struct.int16x8x2_t [[TMP12]]
test_vtrnq_s16(int16x8_t a,int16x8_t b)2096*67e74705SXin Li int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
2097*67e74705SXin Li   return vtrnq_s16(a, b);
2098*67e74705SXin Li }
2099*67e74705SXin Li // CHECK-LABEL: define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
2100*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
2101*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
2102*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
2103*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
2104*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2105*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2106*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
2107*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2108*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2109*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2110*67e74705SXin Li // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
2111*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
2112*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2113*67e74705SXin Li // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
2114*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
2115*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
2116*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2117*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
2118*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
2119*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
2120*67e74705SXin Li // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
2121*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
2122*67e74705SXin Li // CHECK:   ret %struct.int32x4x2_t [[TMP12]]
test_vtrnq_s32(int32x4_t a,int32x4_t b)2123*67e74705SXin Li int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
2124*67e74705SXin Li   return vtrnq_s32(a, b);
2125*67e74705SXin Li }
2126*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
2127*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
2128*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
2129*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
2130*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
2131*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2132*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2133*67e74705SXin Li // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2134*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2135*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2136*67e74705SXin Li // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2137*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
2138*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
2139*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
2140*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
2141*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
2142*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
2143*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2144*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
2145*67e74705SXin Li // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
test_vtrnq_u8(uint8x16_t a,uint8x16_t b)2146*67e74705SXin Li uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
2147*67e74705SXin Li   return vtrnq_u8(a, b);
2148*67e74705SXin Li }
2149*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
2150*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
2151*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
2152*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
2153*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
2154*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2155*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2156*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2157*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2158*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
2159*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2160*67e74705SXin Li // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2161*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2162*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2163*67e74705SXin Li // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
2164*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
2165*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
2166*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2167*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
2168*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
2169*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
2170*67e74705SXin Li // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
2171*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
2172*67e74705SXin Li // CHECK:   ret %struct.uint16x8x2_t [[TMP12]]
test_vtrnq_u16(uint16x8_t a,uint16x8_t b)2173*67e74705SXin Li uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
2174*67e74705SXin Li   return vtrnq_u16(a, b);
2175*67e74705SXin Li }
2176*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
2177*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
2178*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
2179*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
2180*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
2181*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2182*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2183*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
2184*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2185*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2186*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2187*67e74705SXin Li // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
2188*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
2189*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2190*67e74705SXin Li // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
2191*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
2192*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
2193*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2194*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
2195*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
2196*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
2197*67e74705SXin Li // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
2198*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
2199*67e74705SXin Li // CHECK:   ret %struct.uint32x4x2_t [[TMP12]]
test_vtrnq_u32(uint32x4_t a,uint32x4_t b)2200*67e74705SXin Li uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
2201*67e74705SXin Li   return vtrnq_u32(a, b);
2202*67e74705SXin Li }
2203*67e74705SXin Li // CHECK-LABEL: define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) #0 {
2204*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
2205*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
2206*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
2207*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
2208*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
2209*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
2210*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
2211*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
2212*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
2213*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2214*67e74705SXin Li // CHECK:   store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]]
2215*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
2216*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2217*67e74705SXin Li // CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP6]]
2218*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
2219*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
2220*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2221*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
2222*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
2223*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
2224*67e74705SXin Li // CHECK:   store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
2225*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
2226*67e74705SXin Li // CHECK:   ret %struct.float32x4x2_t [[TMP12]]
test_vtrnq_f32(float32x4_t a,float32x4_t b)2227*67e74705SXin Li float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
2228*67e74705SXin Li   return vtrnq_f32(a, b);
2229*67e74705SXin Li }
2230*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
2231*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
2232*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
2233*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
2234*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
2235*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2236*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2237*67e74705SXin Li // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2238*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2239*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2240*67e74705SXin Li // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2241*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
2242*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
2243*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
2244*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
2245*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
2246*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
2247*67e74705SXin Li // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2248*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
2249*67e74705SXin Li // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
test_vtrnq_p8(poly8x16_t a,poly8x16_t b)2250*67e74705SXin Li poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
2251*67e74705SXin Li   return vtrnq_p8(a, b);
2252*67e74705SXin Li }
2253*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
2254*67e74705SXin Li // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
2255*67e74705SXin Li // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
2256*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
2257*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
2258*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2259*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2260*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2261*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2262*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
2263*67e74705SXin Li // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2264*67e74705SXin Li // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2265*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2266*67e74705SXin Li // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2267*67e74705SXin Li // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
2268*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
2269*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
2270*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2271*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
2272*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
2273*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
2274*67e74705SXin Li // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
2275*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
2276*67e74705SXin Li // CHECK:   ret %struct.poly16x8x2_t [[TMP12]]
test_vtrnq_p16(poly16x8_t a,poly16x8_t b)2277*67e74705SXin Li poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
2278*67e74705SXin Li   return vtrnq_p16(a, b);
2279*67e74705SXin Li }
2280