1*67e74705SXin Li // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2*67e74705SXin Li // RUN: -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3*67e74705SXin Li
4*67e74705SXin Li // Test new aarch64 intrinsics and types
5*67e74705SXin Li #include <arm_neon.h>
6*67e74705SXin Li
7*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
9*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vuzp1_s8(int8x8_t a,int8x8_t b)10*67e74705SXin Li int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
11*67e74705SXin Li return vuzp1_s8(a, b);
12*67e74705SXin Li }
13*67e74705SXin Li
14*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
15*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
16*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vuzp1q_s8(int8x16_t a,int8x16_t b)17*67e74705SXin Li int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
18*67e74705SXin Li return vuzp1q_s8(a, b);
19*67e74705SXin Li }
20*67e74705SXin Li
21*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
22*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
23*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vuzp1_s16(int16x4_t a,int16x4_t b)24*67e74705SXin Li int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
25*67e74705SXin Li return vuzp1_s16(a, b);
26*67e74705SXin Li }
27*67e74705SXin Li
28*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
29*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
30*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vuzp1q_s16(int16x8_t a,int16x8_t b)31*67e74705SXin Li int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
32*67e74705SXin Li return vuzp1q_s16(a, b);
33*67e74705SXin Li }
34*67e74705SXin Li
35*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
36*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
37*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vuzp1_s32(int32x2_t a,int32x2_t b)38*67e74705SXin Li int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
39*67e74705SXin Li return vuzp1_s32(a, b);
40*67e74705SXin Li }
41*67e74705SXin Li
42*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
43*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
44*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vuzp1q_s32(int32x4_t a,int32x4_t b)45*67e74705SXin Li int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
46*67e74705SXin Li return vuzp1q_s32(a, b);
47*67e74705SXin Li }
48*67e74705SXin Li
49*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
50*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
51*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vuzp1q_s64(int64x2_t a,int64x2_t b)52*67e74705SXin Li int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
53*67e74705SXin Li return vuzp1q_s64(a, b);
54*67e74705SXin Li }
55*67e74705SXin Li
56*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
57*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
58*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vuzp1_u8(uint8x8_t a,uint8x8_t b)59*67e74705SXin Li uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
60*67e74705SXin Li return vuzp1_u8(a, b);
61*67e74705SXin Li }
62*67e74705SXin Li
63*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
64*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
65*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vuzp1q_u8(uint8x16_t a,uint8x16_t b)66*67e74705SXin Li uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
67*67e74705SXin Li return vuzp1q_u8(a, b);
68*67e74705SXin Li }
69*67e74705SXin Li
70*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
71*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
72*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vuzp1_u16(uint16x4_t a,uint16x4_t b)73*67e74705SXin Li uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
74*67e74705SXin Li return vuzp1_u16(a, b);
75*67e74705SXin Li }
76*67e74705SXin Li
77*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
78*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
79*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vuzp1q_u16(uint16x8_t a,uint16x8_t b)80*67e74705SXin Li uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
81*67e74705SXin Li return vuzp1q_u16(a, b);
82*67e74705SXin Li }
83*67e74705SXin Li
84*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
85*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
86*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vuzp1_u32(uint32x2_t a,uint32x2_t b)87*67e74705SXin Li uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
88*67e74705SXin Li return vuzp1_u32(a, b);
89*67e74705SXin Li }
90*67e74705SXin Li
91*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
92*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
93*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vuzp1q_u32(uint32x4_t a,uint32x4_t b)94*67e74705SXin Li uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
95*67e74705SXin Li return vuzp1q_u32(a, b);
96*67e74705SXin Li }
97*67e74705SXin Li
98*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
99*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
100*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vuzp1q_u64(uint64x2_t a,uint64x2_t b)101*67e74705SXin Li uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
102*67e74705SXin Li return vuzp1q_u64(a, b);
103*67e74705SXin Li }
104*67e74705SXin Li
105*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) #0 {
106*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
107*67e74705SXin Li // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vuzp1_f32(float32x2_t a,float32x2_t b)108*67e74705SXin Li float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
109*67e74705SXin Li return vuzp1_f32(a, b);
110*67e74705SXin Li }
111*67e74705SXin Li
112*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) #0 {
113*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
114*67e74705SXin Li // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vuzp1q_f32(float32x4_t a,float32x4_t b)115*67e74705SXin Li float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
116*67e74705SXin Li return vuzp1q_f32(a, b);
117*67e74705SXin Li }
118*67e74705SXin Li
119*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) #0 {
120*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
121*67e74705SXin Li // CHECK: ret <2 x double> [[SHUFFLE_I]]
test_vuzp1q_f64(float64x2_t a,float64x2_t b)122*67e74705SXin Li float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
123*67e74705SXin Li return vuzp1q_f64(a, b);
124*67e74705SXin Li }
125*67e74705SXin Li
126*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
127*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
128*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vuzp1_p8(poly8x8_t a,poly8x8_t b)129*67e74705SXin Li poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
130*67e74705SXin Li return vuzp1_p8(a, b);
131*67e74705SXin Li }
132*67e74705SXin Li
133*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
134*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
135*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vuzp1q_p8(poly8x16_t a,poly8x16_t b)136*67e74705SXin Li poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
137*67e74705SXin Li return vuzp1q_p8(a, b);
138*67e74705SXin Li }
139*67e74705SXin Li
140*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
141*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
142*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vuzp1_p16(poly16x4_t a,poly16x4_t b)143*67e74705SXin Li poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
144*67e74705SXin Li return vuzp1_p16(a, b);
145*67e74705SXin Li }
146*67e74705SXin Li
147*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
148*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
149*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vuzp1q_p16(poly16x8_t a,poly16x8_t b)150*67e74705SXin Li poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
151*67e74705SXin Li return vuzp1q_p16(a, b);
152*67e74705SXin Li }
153*67e74705SXin Li
154*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
155*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
156*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vuzp2_s8(int8x8_t a,int8x8_t b)157*67e74705SXin Li int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
158*67e74705SXin Li return vuzp2_s8(a, b);
159*67e74705SXin Li }
160*67e74705SXin Li
161*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
162*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
163*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vuzp2q_s8(int8x16_t a,int8x16_t b)164*67e74705SXin Li int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
165*67e74705SXin Li return vuzp2q_s8(a, b);
166*67e74705SXin Li }
167*67e74705SXin Li
168*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
169*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
170*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vuzp2_s16(int16x4_t a,int16x4_t b)171*67e74705SXin Li int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
172*67e74705SXin Li return vuzp2_s16(a, b);
173*67e74705SXin Li }
174*67e74705SXin Li
175*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
176*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
177*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vuzp2q_s16(int16x8_t a,int16x8_t b)178*67e74705SXin Li int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
179*67e74705SXin Li return vuzp2q_s16(a, b);
180*67e74705SXin Li }
181*67e74705SXin Li
182*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
183*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
184*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vuzp2_s32(int32x2_t a,int32x2_t b)185*67e74705SXin Li int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
186*67e74705SXin Li return vuzp2_s32(a, b);
187*67e74705SXin Li }
188*67e74705SXin Li
189*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
190*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
191*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vuzp2q_s32(int32x4_t a,int32x4_t b)192*67e74705SXin Li int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
193*67e74705SXin Li return vuzp2q_s32(a, b);
194*67e74705SXin Li }
195*67e74705SXin Li
196*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
197*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
198*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vuzp2q_s64(int64x2_t a,int64x2_t b)199*67e74705SXin Li int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
200*67e74705SXin Li return vuzp2q_s64(a, b);
201*67e74705SXin Li }
202*67e74705SXin Li
203*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
204*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
205*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vuzp2_u8(uint8x8_t a,uint8x8_t b)206*67e74705SXin Li uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
207*67e74705SXin Li return vuzp2_u8(a, b);
208*67e74705SXin Li }
209*67e74705SXin Li
210*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
211*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
212*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vuzp2q_u8(uint8x16_t a,uint8x16_t b)213*67e74705SXin Li uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
214*67e74705SXin Li return vuzp2q_u8(a, b);
215*67e74705SXin Li }
216*67e74705SXin Li
217*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
218*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
219*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vuzp2_u16(uint16x4_t a,uint16x4_t b)220*67e74705SXin Li uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
221*67e74705SXin Li return vuzp2_u16(a, b);
222*67e74705SXin Li }
223*67e74705SXin Li
224*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
225*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
226*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vuzp2q_u16(uint16x8_t a,uint16x8_t b)227*67e74705SXin Li uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
228*67e74705SXin Li return vuzp2q_u16(a, b);
229*67e74705SXin Li }
230*67e74705SXin Li
231*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
232*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
233*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vuzp2_u32(uint32x2_t a,uint32x2_t b)234*67e74705SXin Li uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
235*67e74705SXin Li return vuzp2_u32(a, b);
236*67e74705SXin Li }
237*67e74705SXin Li
238*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
239*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
240*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vuzp2q_u32(uint32x4_t a,uint32x4_t b)241*67e74705SXin Li uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
242*67e74705SXin Li return vuzp2q_u32(a, b);
243*67e74705SXin Li }
244*67e74705SXin Li
245*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
246*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
247*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vuzp2q_u64(uint64x2_t a,uint64x2_t b)248*67e74705SXin Li uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
249*67e74705SXin Li return vuzp2q_u64(a, b);
250*67e74705SXin Li }
251*67e74705SXin Li
252*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) #0 {
253*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
254*67e74705SXin Li // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vuzp2_f32(float32x2_t a,float32x2_t b)255*67e74705SXin Li float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
256*67e74705SXin Li return vuzp2_f32(a, b);
257*67e74705SXin Li }
258*67e74705SXin Li
259*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) #0 {
260*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
261*67e74705SXin Li // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vuzp2q_f32(float32x4_t a,float32x4_t b)262*67e74705SXin Li float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
263*67e74705SXin Li return vuzp2q_f32(a, b);
264*67e74705SXin Li }
265*67e74705SXin Li
266*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) #0 {
267*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
268*67e74705SXin Li // CHECK: ret <2 x double> [[SHUFFLE_I]]
test_vuzp2q_f64(float64x2_t a,float64x2_t b)269*67e74705SXin Li float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
270*67e74705SXin Li return vuzp2q_f64(a, b);
271*67e74705SXin Li }
272*67e74705SXin Li
273*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
274*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
275*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vuzp2_p8(poly8x8_t a,poly8x8_t b)276*67e74705SXin Li poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
277*67e74705SXin Li return vuzp2_p8(a, b);
278*67e74705SXin Li }
279*67e74705SXin Li
280*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
281*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
282*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vuzp2q_p8(poly8x16_t a,poly8x16_t b)283*67e74705SXin Li poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
284*67e74705SXin Li return vuzp2q_p8(a, b);
285*67e74705SXin Li }
286*67e74705SXin Li
287*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
288*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
289*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vuzp2_p16(poly16x4_t a,poly16x4_t b)290*67e74705SXin Li poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
291*67e74705SXin Li return vuzp2_p16(a, b);
292*67e74705SXin Li }
293*67e74705SXin Li
294*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
295*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
296*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vuzp2q_p16(poly16x8_t a,poly16x8_t b)297*67e74705SXin Li poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
298*67e74705SXin Li return vuzp2q_p16(a, b);
299*67e74705SXin Li }
300*67e74705SXin Li
301*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
302*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
303*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vzip1_s8(int8x8_t a,int8x8_t b)304*67e74705SXin Li int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
305*67e74705SXin Li return vzip1_s8(a, b);
306*67e74705SXin Li }
307*67e74705SXin Li
308*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
309*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
310*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vzip1q_s8(int8x16_t a,int8x16_t b)311*67e74705SXin Li int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
312*67e74705SXin Li return vzip1q_s8(a, b);
313*67e74705SXin Li }
314*67e74705SXin Li
315*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
316*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
317*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vzip1_s16(int16x4_t a,int16x4_t b)318*67e74705SXin Li int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
319*67e74705SXin Li return vzip1_s16(a, b);
320*67e74705SXin Li }
321*67e74705SXin Li
322*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
323*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
324*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vzip1q_s16(int16x8_t a,int16x8_t b)325*67e74705SXin Li int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
326*67e74705SXin Li return vzip1q_s16(a, b);
327*67e74705SXin Li }
328*67e74705SXin Li
329*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
330*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
331*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vzip1_s32(int32x2_t a,int32x2_t b)332*67e74705SXin Li int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
333*67e74705SXin Li return vzip1_s32(a, b);
334*67e74705SXin Li }
335*67e74705SXin Li
336*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
337*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
338*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vzip1q_s32(int32x4_t a,int32x4_t b)339*67e74705SXin Li int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
340*67e74705SXin Li return vzip1q_s32(a, b);
341*67e74705SXin Li }
342*67e74705SXin Li
343*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
344*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
345*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vzip1q_s64(int64x2_t a,int64x2_t b)346*67e74705SXin Li int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
347*67e74705SXin Li return vzip1q_s64(a, b);
348*67e74705SXin Li }
349*67e74705SXin Li
350*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
351*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
352*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vzip1_u8(uint8x8_t a,uint8x8_t b)353*67e74705SXin Li uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
354*67e74705SXin Li return vzip1_u8(a, b);
355*67e74705SXin Li }
356*67e74705SXin Li
357*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
358*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
359*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vzip1q_u8(uint8x16_t a,uint8x16_t b)360*67e74705SXin Li uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
361*67e74705SXin Li return vzip1q_u8(a, b);
362*67e74705SXin Li }
363*67e74705SXin Li
364*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
365*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
366*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vzip1_u16(uint16x4_t a,uint16x4_t b)367*67e74705SXin Li uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
368*67e74705SXin Li return vzip1_u16(a, b);
369*67e74705SXin Li }
370*67e74705SXin Li
371*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
372*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
373*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vzip1q_u16(uint16x8_t a,uint16x8_t b)374*67e74705SXin Li uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
375*67e74705SXin Li return vzip1q_u16(a, b);
376*67e74705SXin Li }
377*67e74705SXin Li
378*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
379*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
380*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vzip1_u32(uint32x2_t a,uint32x2_t b)381*67e74705SXin Li uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
382*67e74705SXin Li return vzip1_u32(a, b);
383*67e74705SXin Li }
384*67e74705SXin Li
385*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
386*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
387*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vzip1q_u32(uint32x4_t a,uint32x4_t b)388*67e74705SXin Li uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
389*67e74705SXin Li return vzip1q_u32(a, b);
390*67e74705SXin Li }
391*67e74705SXin Li
392*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
393*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
394*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vzip1q_u64(uint64x2_t a,uint64x2_t b)395*67e74705SXin Li uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
396*67e74705SXin Li return vzip1q_u64(a, b);
397*67e74705SXin Li }
398*67e74705SXin Li
399*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) #0 {
400*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
401*67e74705SXin Li // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vzip1_f32(float32x2_t a,float32x2_t b)402*67e74705SXin Li float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
403*67e74705SXin Li return vzip1_f32(a, b);
404*67e74705SXin Li }
405*67e74705SXin Li
406*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) #0 {
407*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
408*67e74705SXin Li // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vzip1q_f32(float32x4_t a,float32x4_t b)409*67e74705SXin Li float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
410*67e74705SXin Li return vzip1q_f32(a, b);
411*67e74705SXin Li }
412*67e74705SXin Li
413*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) #0 {
414*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
415*67e74705SXin Li // CHECK: ret <2 x double> [[SHUFFLE_I]]
test_vzip1q_f64(float64x2_t a,float64x2_t b)416*67e74705SXin Li float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
417*67e74705SXin Li return vzip1q_f64(a, b);
418*67e74705SXin Li }
419*67e74705SXin Li
420*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
421*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
422*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vzip1_p8(poly8x8_t a,poly8x8_t b)423*67e74705SXin Li poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
424*67e74705SXin Li return vzip1_p8(a, b);
425*67e74705SXin Li }
426*67e74705SXin Li
427*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
428*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
429*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vzip1q_p8(poly8x16_t a,poly8x16_t b)430*67e74705SXin Li poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
431*67e74705SXin Li return vzip1q_p8(a, b);
432*67e74705SXin Li }
433*67e74705SXin Li
434*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
435*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
436*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vzip1_p16(poly16x4_t a,poly16x4_t b)437*67e74705SXin Li poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
438*67e74705SXin Li return vzip1_p16(a, b);
439*67e74705SXin Li }
440*67e74705SXin Li
441*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
442*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
443*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vzip1q_p16(poly16x8_t a,poly16x8_t b)444*67e74705SXin Li poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
445*67e74705SXin Li return vzip1q_p16(a, b);
446*67e74705SXin Li }
447*67e74705SXin Li
448*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
449*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
450*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vzip2_s8(int8x8_t a,int8x8_t b)451*67e74705SXin Li int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
452*67e74705SXin Li return vzip2_s8(a, b);
453*67e74705SXin Li }
454*67e74705SXin Li
455*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
456*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
457*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vzip2q_s8(int8x16_t a,int8x16_t b)458*67e74705SXin Li int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
459*67e74705SXin Li return vzip2q_s8(a, b);
460*67e74705SXin Li }
461*67e74705SXin Li
462*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
463*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
464*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vzip2_s16(int16x4_t a,int16x4_t b)465*67e74705SXin Li int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
466*67e74705SXin Li return vzip2_s16(a, b);
467*67e74705SXin Li }
468*67e74705SXin Li
469*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
470*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
471*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vzip2q_s16(int16x8_t a,int16x8_t b)472*67e74705SXin Li int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
473*67e74705SXin Li return vzip2q_s16(a, b);
474*67e74705SXin Li }
475*67e74705SXin Li
476*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
477*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
478*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vzip2_s32(int32x2_t a,int32x2_t b)479*67e74705SXin Li int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
480*67e74705SXin Li return vzip2_s32(a, b);
481*67e74705SXin Li }
482*67e74705SXin Li
483*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
484*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
485*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vzip2q_s32(int32x4_t a,int32x4_t b)486*67e74705SXin Li int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
487*67e74705SXin Li return vzip2q_s32(a, b);
488*67e74705SXin Li }
489*67e74705SXin Li
490*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
491*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
492*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vzip2q_s64(int64x2_t a,int64x2_t b)493*67e74705SXin Li int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
494*67e74705SXin Li return vzip2q_s64(a, b);
495*67e74705SXin Li }
496*67e74705SXin Li
497*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
498*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
499*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vzip2_u8(uint8x8_t a,uint8x8_t b)500*67e74705SXin Li uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
501*67e74705SXin Li return vzip2_u8(a, b);
502*67e74705SXin Li }
503*67e74705SXin Li
504*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
505*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
506*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vzip2q_u8(uint8x16_t a,uint8x16_t b)507*67e74705SXin Li uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
508*67e74705SXin Li return vzip2q_u8(a, b);
509*67e74705SXin Li }
510*67e74705SXin Li
511*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
512*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
513*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vzip2_u16(uint16x4_t a,uint16x4_t b)514*67e74705SXin Li uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
515*67e74705SXin Li return vzip2_u16(a, b);
516*67e74705SXin Li }
517*67e74705SXin Li
518*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
519*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
520*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vzip2q_u16(uint16x8_t a,uint16x8_t b)521*67e74705SXin Li uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
522*67e74705SXin Li return vzip2q_u16(a, b);
523*67e74705SXin Li }
524*67e74705SXin Li
525*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
526*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
527*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vzip2_u32(uint32x2_t a,uint32x2_t b)528*67e74705SXin Li uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
529*67e74705SXin Li return vzip2_u32(a, b);
530*67e74705SXin Li }
531*67e74705SXin Li
532*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
533*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
534*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vzip2q_u32(uint32x4_t a,uint32x4_t b)535*67e74705SXin Li uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
536*67e74705SXin Li return vzip2q_u32(a, b);
537*67e74705SXin Li }
538*67e74705SXin Li
539*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
540*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
541*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vzip2q_u64(uint64x2_t a,uint64x2_t b)542*67e74705SXin Li uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
543*67e74705SXin Li return vzip2q_u64(a, b);
544*67e74705SXin Li }
545*67e74705SXin Li
546*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) #0 {
547*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
548*67e74705SXin Li // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vzip2_f32(float32x2_t a,float32x2_t b)549*67e74705SXin Li float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
550*67e74705SXin Li return vzip2_f32(a, b);
551*67e74705SXin Li }
552*67e74705SXin Li
553*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) #0 {
554*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
555*67e74705SXin Li // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vzip2q_f32(float32x4_t a,float32x4_t b)556*67e74705SXin Li float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
557*67e74705SXin Li return vzip2q_f32(a, b);
558*67e74705SXin Li }
559*67e74705SXin Li
560*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) #0 {
561*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
562*67e74705SXin Li // CHECK: ret <2 x double> [[SHUFFLE_I]]
test_vzip2q_f64(float64x2_t a,float64x2_t b)563*67e74705SXin Li float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
564*67e74705SXin Li return vzip2q_f64(a, b);
565*67e74705SXin Li }
566*67e74705SXin Li
567*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
568*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
569*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vzip2_p8(poly8x8_t a,poly8x8_t b)570*67e74705SXin Li poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
571*67e74705SXin Li return vzip2_p8(a, b);
572*67e74705SXin Li }
573*67e74705SXin Li
574*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
575*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
576*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vzip2q_p8(poly8x16_t a,poly8x16_t b)577*67e74705SXin Li poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
578*67e74705SXin Li return vzip2q_p8(a, b);
579*67e74705SXin Li }
580*67e74705SXin Li
581*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
582*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
583*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vzip2_p16(poly16x4_t a,poly16x4_t b)584*67e74705SXin Li poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
585*67e74705SXin Li return vzip2_p16(a, b);
586*67e74705SXin Li }
587*67e74705SXin Li
588*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
589*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
590*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vzip2q_p16(poly16x8_t a,poly16x8_t b)591*67e74705SXin Li poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
592*67e74705SXin Li return vzip2q_p16(a, b);
593*67e74705SXin Li }
594*67e74705SXin Li
595*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
596*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
597*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vtrn1_s8(int8x8_t a,int8x8_t b)598*67e74705SXin Li int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
599*67e74705SXin Li return vtrn1_s8(a, b);
600*67e74705SXin Li }
601*67e74705SXin Li
602*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
603*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
604*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vtrn1q_s8(int8x16_t a,int8x16_t b)605*67e74705SXin Li int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
606*67e74705SXin Li return vtrn1q_s8(a, b);
607*67e74705SXin Li }
608*67e74705SXin Li
609*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
610*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
611*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vtrn1_s16(int16x4_t a,int16x4_t b)612*67e74705SXin Li int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
613*67e74705SXin Li return vtrn1_s16(a, b);
614*67e74705SXin Li }
615*67e74705SXin Li
616*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
617*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
618*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vtrn1q_s16(int16x8_t a,int16x8_t b)619*67e74705SXin Li int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
620*67e74705SXin Li return vtrn1q_s16(a, b);
621*67e74705SXin Li }
622*67e74705SXin Li
623*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
624*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
625*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vtrn1_s32(int32x2_t a,int32x2_t b)626*67e74705SXin Li int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
627*67e74705SXin Li return vtrn1_s32(a, b);
628*67e74705SXin Li }
629*67e74705SXin Li
630*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
631*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
632*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vtrn1q_s32(int32x4_t a,int32x4_t b)633*67e74705SXin Li int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
634*67e74705SXin Li return vtrn1q_s32(a, b);
635*67e74705SXin Li }
636*67e74705SXin Li
637*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
638*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
639*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vtrn1q_s64(int64x2_t a,int64x2_t b)640*67e74705SXin Li int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
641*67e74705SXin Li return vtrn1q_s64(a, b);
642*67e74705SXin Li }
643*67e74705SXin Li
644*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
645*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
646*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vtrn1_u8(uint8x8_t a,uint8x8_t b)647*67e74705SXin Li uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
648*67e74705SXin Li return vtrn1_u8(a, b);
649*67e74705SXin Li }
650*67e74705SXin Li
651*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
652*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
653*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vtrn1q_u8(uint8x16_t a,uint8x16_t b)654*67e74705SXin Li uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
655*67e74705SXin Li return vtrn1q_u8(a, b);
656*67e74705SXin Li }
657*67e74705SXin Li
658*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
659*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
660*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vtrn1_u16(uint16x4_t a,uint16x4_t b)661*67e74705SXin Li uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
662*67e74705SXin Li return vtrn1_u16(a, b);
663*67e74705SXin Li }
664*67e74705SXin Li
665*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
666*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
667*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vtrn1q_u16(uint16x8_t a,uint16x8_t b)668*67e74705SXin Li uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
669*67e74705SXin Li return vtrn1q_u16(a, b);
670*67e74705SXin Li }
671*67e74705SXin Li
672*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
673*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
674*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vtrn1_u32(uint32x2_t a,uint32x2_t b)675*67e74705SXin Li uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
676*67e74705SXin Li return vtrn1_u32(a, b);
677*67e74705SXin Li }
678*67e74705SXin Li
679*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
680*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
681*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vtrn1q_u32(uint32x4_t a,uint32x4_t b)682*67e74705SXin Li uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
683*67e74705SXin Li return vtrn1q_u32(a, b);
684*67e74705SXin Li }
685*67e74705SXin Li
686*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
687*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
688*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vtrn1q_u64(uint64x2_t a,uint64x2_t b)689*67e74705SXin Li uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
690*67e74705SXin Li return vtrn1q_u64(a, b);
691*67e74705SXin Li }
692*67e74705SXin Li
693*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) #0 {
694*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
695*67e74705SXin Li // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vtrn1_f32(float32x2_t a,float32x2_t b)696*67e74705SXin Li float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
697*67e74705SXin Li return vtrn1_f32(a, b);
698*67e74705SXin Li }
699*67e74705SXin Li
700*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) #0 {
701*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
702*67e74705SXin Li // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vtrn1q_f32(float32x4_t a,float32x4_t b)703*67e74705SXin Li float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
704*67e74705SXin Li return vtrn1q_f32(a, b);
705*67e74705SXin Li }
706*67e74705SXin Li
707*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) #0 {
708*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
709*67e74705SXin Li // CHECK: ret <2 x double> [[SHUFFLE_I]]
test_vtrn1q_f64(float64x2_t a,float64x2_t b)710*67e74705SXin Li float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
711*67e74705SXin Li return vtrn1q_f64(a, b);
712*67e74705SXin Li }
713*67e74705SXin Li
714*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
715*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
716*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vtrn1_p8(poly8x8_t a,poly8x8_t b)717*67e74705SXin Li poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
718*67e74705SXin Li return vtrn1_p8(a, b);
719*67e74705SXin Li }
720*67e74705SXin Li
721*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
722*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
723*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vtrn1q_p8(poly8x16_t a,poly8x16_t b)724*67e74705SXin Li poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
725*67e74705SXin Li return vtrn1q_p8(a, b);
726*67e74705SXin Li }
727*67e74705SXin Li
728*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
729*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
730*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vtrn1_p16(poly16x4_t a,poly16x4_t b)731*67e74705SXin Li poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
732*67e74705SXin Li return vtrn1_p16(a, b);
733*67e74705SXin Li }
734*67e74705SXin Li
735*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
736*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
737*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vtrn1q_p16(poly16x8_t a,poly16x8_t b)738*67e74705SXin Li poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
739*67e74705SXin Li return vtrn1q_p16(a, b);
740*67e74705SXin Li }
741*67e74705SXin Li
742*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
743*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
744*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vtrn2_s8(int8x8_t a,int8x8_t b)745*67e74705SXin Li int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
746*67e74705SXin Li return vtrn2_s8(a, b);
747*67e74705SXin Li }
748*67e74705SXin Li
749*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
750*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
751*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vtrn2q_s8(int8x16_t a,int8x16_t b)752*67e74705SXin Li int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
753*67e74705SXin Li return vtrn2q_s8(a, b);
754*67e74705SXin Li }
755*67e74705SXin Li
756*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
757*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
758*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vtrn2_s16(int16x4_t a,int16x4_t b)759*67e74705SXin Li int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
760*67e74705SXin Li return vtrn2_s16(a, b);
761*67e74705SXin Li }
762*67e74705SXin Li
763*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
764*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
765*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vtrn2q_s16(int16x8_t a,int16x8_t b)766*67e74705SXin Li int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
767*67e74705SXin Li return vtrn2q_s16(a, b);
768*67e74705SXin Li }
769*67e74705SXin Li
770*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
771*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
772*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vtrn2_s32(int32x2_t a,int32x2_t b)773*67e74705SXin Li int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
774*67e74705SXin Li return vtrn2_s32(a, b);
775*67e74705SXin Li }
776*67e74705SXin Li
777*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
778*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
779*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vtrn2q_s32(int32x4_t a,int32x4_t b)780*67e74705SXin Li int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
781*67e74705SXin Li return vtrn2q_s32(a, b);
782*67e74705SXin Li }
783*67e74705SXin Li
784*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
785*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
786*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vtrn2q_s64(int64x2_t a,int64x2_t b)787*67e74705SXin Li int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
788*67e74705SXin Li return vtrn2q_s64(a, b);
789*67e74705SXin Li }
790*67e74705SXin Li
791*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
792*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
793*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vtrn2_u8(uint8x8_t a,uint8x8_t b)794*67e74705SXin Li uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
795*67e74705SXin Li return vtrn2_u8(a, b);
796*67e74705SXin Li }
797*67e74705SXin Li
798*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
799*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
800*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vtrn2q_u8(uint8x16_t a,uint8x16_t b)801*67e74705SXin Li uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
802*67e74705SXin Li return vtrn2q_u8(a, b);
803*67e74705SXin Li }
804*67e74705SXin Li
805*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
806*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
807*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vtrn2_u16(uint16x4_t a,uint16x4_t b)808*67e74705SXin Li uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
809*67e74705SXin Li return vtrn2_u16(a, b);
810*67e74705SXin Li }
811*67e74705SXin Li
812*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
813*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
814*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vtrn2q_u16(uint16x8_t a,uint16x8_t b)815*67e74705SXin Li uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
816*67e74705SXin Li return vtrn2q_u16(a, b);
817*67e74705SXin Li }
818*67e74705SXin Li
819*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
820*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
821*67e74705SXin Li // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vtrn2_u32(uint32x2_t a,uint32x2_t b)822*67e74705SXin Li uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
823*67e74705SXin Li return vtrn2_u32(a, b);
824*67e74705SXin Li }
825*67e74705SXin Li
826*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
827*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
828*67e74705SXin Li // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vtrn2q_u32(uint32x4_t a,uint32x4_t b)829*67e74705SXin Li uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
830*67e74705SXin Li return vtrn2q_u32(a, b);
831*67e74705SXin Li }
832*67e74705SXin Li
833*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
834*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
835*67e74705SXin Li // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vtrn2q_u64(uint64x2_t a,uint64x2_t b)836*67e74705SXin Li uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
837*67e74705SXin Li return vtrn2q_u64(a, b);
838*67e74705SXin Li }
839*67e74705SXin Li
840*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) #0 {
841*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
842*67e74705SXin Li // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vtrn2_f32(float32x2_t a,float32x2_t b)843*67e74705SXin Li float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
844*67e74705SXin Li return vtrn2_f32(a, b);
845*67e74705SXin Li }
846*67e74705SXin Li
847*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) #0 {
848*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
849*67e74705SXin Li // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vtrn2q_f32(float32x4_t a,float32x4_t b)850*67e74705SXin Li float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
851*67e74705SXin Li return vtrn2q_f32(a, b);
852*67e74705SXin Li }
853*67e74705SXin Li
854*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) #0 {
855*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
856*67e74705SXin Li // CHECK: ret <2 x double> [[SHUFFLE_I]]
test_vtrn2q_f64(float64x2_t a,float64x2_t b)857*67e74705SXin Li float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
858*67e74705SXin Li return vtrn2q_f64(a, b);
859*67e74705SXin Li }
860*67e74705SXin Li
861*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
862*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
863*67e74705SXin Li // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vtrn2_p8(poly8x8_t a,poly8x8_t b)864*67e74705SXin Li poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
865*67e74705SXin Li return vtrn2_p8(a, b);
866*67e74705SXin Li }
867*67e74705SXin Li
868*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
869*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
870*67e74705SXin Li // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vtrn2q_p8(poly8x16_t a,poly8x16_t b)871*67e74705SXin Li poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
872*67e74705SXin Li return vtrn2q_p8(a, b);
873*67e74705SXin Li }
874*67e74705SXin Li
875*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
876*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
877*67e74705SXin Li // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vtrn2_p16(poly16x4_t a,poly16x4_t b)878*67e74705SXin Li poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
879*67e74705SXin Li return vtrn2_p16(a, b);
880*67e74705SXin Li }
881*67e74705SXin Li
882*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
883*67e74705SXin Li // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
884*67e74705SXin Li // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vtrn2q_p16(poly16x8_t a,poly16x8_t b)885*67e74705SXin Li poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
886*67e74705SXin Li return vtrn2q_p16(a, b);
887*67e74705SXin Li }
888*67e74705SXin Li
889*67e74705SXin Li // CHECK-LABEL: define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) #0 {
890*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
891*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
892*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
893*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
894*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
895*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
896*67e74705SXin Li // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
897*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
898*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
899*67e74705SXin Li // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
900*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
901*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
902*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
903*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
904*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
905*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
906*67e74705SXin Li // CHECK: store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
907*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
908*67e74705SXin Li // CHECK: ret %struct.int8x8x2_t [[TMP8]]
test_vuzp_s8(int8x8_t a,int8x8_t b)909*67e74705SXin Li int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
910*67e74705SXin Li return vuzp_s8(a, b);
911*67e74705SXin Li }
912*67e74705SXin Li
913*67e74705SXin Li // CHECK-LABEL: define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) #0 {
914*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
915*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
916*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
917*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
918*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
919*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
920*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
921*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
922*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
923*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
924*67e74705SXin Li // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
925*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
926*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
927*67e74705SXin Li // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
928*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
929*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
930*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
931*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
932*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
933*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
934*67e74705SXin Li // CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
935*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
936*67e74705SXin Li // CHECK: ret %struct.int16x4x2_t [[TMP12]]
test_vuzp_s16(int16x4_t a,int16x4_t b)937*67e74705SXin Li int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
938*67e74705SXin Li return vuzp_s16(a, b);
939*67e74705SXin Li }
940*67e74705SXin Li // CHECK-LABEL: define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) #0 {
941*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
942*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
943*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
944*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
945*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
946*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
947*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
948*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
949*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
950*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
951*67e74705SXin Li // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
952*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
953*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
954*67e74705SXin Li // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
955*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
956*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
957*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
958*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
959*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
960*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
961*67e74705SXin Li // CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
962*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
963*67e74705SXin Li // CHECK: ret %struct.int32x2x2_t [[TMP12]]
test_vuzp_s32(int32x2_t a,int32x2_t b)964*67e74705SXin Li int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
965*67e74705SXin Li return vuzp_s32(a, b);
966*67e74705SXin Li }
967*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) #0 {
968*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
969*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
970*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
971*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
972*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
973*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
974*67e74705SXin Li // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
975*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
976*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
977*67e74705SXin Li // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
978*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
979*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
980*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
981*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
982*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
983*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
984*67e74705SXin Li // CHECK: store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
985*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
986*67e74705SXin Li // CHECK: ret %struct.uint8x8x2_t [[TMP8]]
test_vuzp_u8(uint8x8_t a,uint8x8_t b)987*67e74705SXin Li uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
988*67e74705SXin Li return vuzp_u8(a, b);
989*67e74705SXin Li }
990*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) #0 {
991*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
992*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
993*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
994*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
995*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
996*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
997*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
998*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
999*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1000*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1001*67e74705SXin Li // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
1002*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1003*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1004*67e74705SXin Li // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
1005*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1006*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1007*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1008*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1009*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1010*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
1011*67e74705SXin Li // CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1012*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1013*67e74705SXin Li // CHECK: ret %struct.uint16x4x2_t [[TMP12]]
test_vuzp_u16(uint16x4_t a,uint16x4_t b)1014*67e74705SXin Li uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
1015*67e74705SXin Li return vuzp_u16(a, b);
1016*67e74705SXin Li }
1017*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1018*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1019*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1020*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1021*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1022*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1023*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1024*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1025*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1026*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1027*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1028*67e74705SXin Li // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
1029*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1030*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1031*67e74705SXin Li // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
1032*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1033*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1034*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1035*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1036*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1037*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
1038*67e74705SXin Li // CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1039*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1040*67e74705SXin Li // CHECK: ret %struct.uint32x2x2_t [[TMP12]]
test_vuzp_u32(uint32x2_t a,uint32x2_t b)1041*67e74705SXin Li uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
1042*67e74705SXin Li return vuzp_u32(a, b);
1043*67e74705SXin Li }
1044*67e74705SXin Li // CHECK-LABEL: define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) #0 {
1045*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1046*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1047*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1048*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1049*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1050*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1051*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1052*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1053*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1054*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
1055*67e74705SXin Li // CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]]
1056*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1057*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
1058*67e74705SXin Li // CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP6]]
1059*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1060*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1061*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1062*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1063*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1064*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
1065*67e74705SXin Li // CHECK: store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
1066*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1067*67e74705SXin Li // CHECK: ret %struct.float32x2x2_t [[TMP12]]
test_vuzp_f32(float32x2_t a,float32x2_t b)1068*67e74705SXin Li float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
1069*67e74705SXin Li return vuzp_f32(a, b);
1070*67e74705SXin Li }
1071*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) #0 {
1072*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1073*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1074*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1075*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1076*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1077*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1078*67e74705SXin Li // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
1079*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1080*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1081*67e74705SXin Li // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
1082*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1083*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1084*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1085*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1086*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1087*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1088*67e74705SXin Li // CHECK: store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1089*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1090*67e74705SXin Li // CHECK: ret %struct.poly8x8x2_t [[TMP8]]
test_vuzp_p8(poly8x8_t a,poly8x8_t b)1091*67e74705SXin Li poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
1092*67e74705SXin Li return vuzp_p8(a, b);
1093*67e74705SXin Li }
1094*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) #0 {
1095*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1096*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1097*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1098*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1099*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1100*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1101*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1102*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1103*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1104*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1105*67e74705SXin Li // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
1106*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1107*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1108*67e74705SXin Li // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
1109*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
1110*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1111*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1112*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
1113*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
1114*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
1115*67e74705SXin Li // CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1116*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
1117*67e74705SXin Li // CHECK: ret %struct.poly16x4x2_t [[TMP12]]
test_vuzp_p16(poly16x4_t a,poly16x4_t b)1118*67e74705SXin Li poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
1119*67e74705SXin Li return vuzp_p16(a, b);
1120*67e74705SXin Li }
1121*67e74705SXin Li // CHECK-LABEL: define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
1122*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1123*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1124*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1125*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1126*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1127*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1128*67e74705SXin Li // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1129*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1130*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1131*67e74705SXin Li // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1132*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
1133*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1134*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1135*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
1136*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
1137*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1138*67e74705SXin Li // CHECK: store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1139*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
1140*67e74705SXin Li // CHECK: ret %struct.int8x16x2_t [[TMP8]]
test_vuzpq_s8(int8x16_t a,int8x16_t b)1141*67e74705SXin Li int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
1142*67e74705SXin Li return vuzpq_s8(a, b);
1143*67e74705SXin Li }
1144*67e74705SXin Li // CHECK-LABEL: define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
1145*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1146*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1147*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1148*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1149*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1150*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1151*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1152*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1153*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1154*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1155*67e74705SXin Li // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1156*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1157*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1158*67e74705SXin Li // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
1159*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
1160*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1161*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1162*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
1163*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
1164*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
1165*67e74705SXin Li // CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1166*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
1167*67e74705SXin Li // CHECK: ret %struct.int16x8x2_t [[TMP12]]
test_vuzpq_s16(int16x8_t a,int16x8_t b)1168*67e74705SXin Li int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
1169*67e74705SXin Li return vuzpq_s16(a, b);
1170*67e74705SXin Li }
1171*67e74705SXin Li // CHECK-LABEL: define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
1172*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1173*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1174*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1175*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1176*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1177*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1178*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1179*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1180*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1181*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1182*67e74705SXin Li // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
1183*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1184*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1185*67e74705SXin Li // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
1186*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
1187*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1188*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1189*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
1190*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
1191*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
1192*67e74705SXin Li // CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
1193*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
1194*67e74705SXin Li // CHECK: ret %struct.int32x4x2_t [[TMP12]]
test_vuzpq_s32(int32x4_t a,int32x4_t b)1195*67e74705SXin Li int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
1196*67e74705SXin Li return vuzpq_s32(a, b);
1197*67e74705SXin Li }
1198*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
1199*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1200*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1201*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1202*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1203*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1204*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1205*67e74705SXin Li // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1206*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1207*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1208*67e74705SXin Li // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1209*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
1210*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1211*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1212*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
1213*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
1214*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1215*67e74705SXin Li // CHECK: store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1216*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
1217*67e74705SXin Li // CHECK: ret %struct.uint8x16x2_t [[TMP8]]
test_vuzpq_u8(uint8x16_t a,uint8x16_t b)1218*67e74705SXin Li uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
1219*67e74705SXin Li return vuzpq_u8(a, b);
1220*67e74705SXin Li }
1221*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
1222*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1223*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1224*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1225*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1226*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1227*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1228*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1229*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1230*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1231*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1232*67e74705SXin Li // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1233*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1234*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1235*67e74705SXin Li // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
1236*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
1237*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1238*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1239*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
1240*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
1241*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
1242*67e74705SXin Li // CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1243*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
1244*67e74705SXin Li // CHECK: ret %struct.uint16x8x2_t [[TMP12]]
test_vuzpq_u16(uint16x8_t a,uint16x8_t b)1245*67e74705SXin Li uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
1246*67e74705SXin Li return vuzpq_u16(a, b);
1247*67e74705SXin Li }
1248*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
1249*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1250*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1251*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1252*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1253*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1254*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1255*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1256*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1257*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1258*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1259*67e74705SXin Li // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
1260*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1261*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1262*67e74705SXin Li // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
1263*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
1264*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1265*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1266*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
1267*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
1268*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
1269*67e74705SXin Li // CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
1270*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
1271*67e74705SXin Li // CHECK: ret %struct.uint32x4x2_t [[TMP12]]
test_vuzpq_u32(uint32x4_t a,uint32x4_t b)1272*67e74705SXin Li uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
1273*67e74705SXin Li return vuzpq_u32(a, b);
1274*67e74705SXin Li }
1275*67e74705SXin Li // CHECK-LABEL: define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) #0 {
1276*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1277*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1278*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1279*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1280*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1281*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1282*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
1283*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1284*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
1285*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1286*67e74705SXin Li // CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]]
1287*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
1288*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1289*67e74705SXin Li // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP6]]
1290*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
1291*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1292*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1293*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
1294*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
1295*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
1296*67e74705SXin Li // CHECK: store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
1297*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
1298*67e74705SXin Li // CHECK: ret %struct.float32x4x2_t [[TMP12]]
test_vuzpq_f32(float32x4_t a,float32x4_t b)1299*67e74705SXin Li float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
1300*67e74705SXin Li return vuzpq_f32(a, b);
1301*67e74705SXin Li }
1302*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
1303*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1304*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1305*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1306*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1307*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1308*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1309*67e74705SXin Li // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1310*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1311*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1312*67e74705SXin Li // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1313*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
1314*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1315*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1316*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
1317*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
1318*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1319*67e74705SXin Li // CHECK: store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1320*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
1321*67e74705SXin Li // CHECK: ret %struct.poly8x16x2_t [[TMP8]]
test_vuzpq_p8(poly8x16_t a,poly8x16_t b)1322*67e74705SXin Li poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
1323*67e74705SXin Li return vuzpq_p8(a, b);
1324*67e74705SXin Li }
1325*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
1326*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1327*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1328*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1329*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1330*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1331*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1332*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1333*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1334*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1335*67e74705SXin Li // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1336*67e74705SXin Li // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1337*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1338*67e74705SXin Li // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1339*67e74705SXin Li // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
1340*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
1341*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1342*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1343*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
1344*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
1345*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
1346*67e74705SXin Li // CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1347*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
1348*67e74705SXin Li // CHECK: ret %struct.poly16x8x2_t [[TMP12]]
test_vuzpq_p16(poly16x8_t a,poly16x8_t b)1349*67e74705SXin Li poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
1350*67e74705SXin Li return vuzpq_p16(a, b);
1351*67e74705SXin Li }
1352*67e74705SXin Li
1353*67e74705SXin Li // CHECK-LABEL: define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) #0 {
1354*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1355*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1356*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1357*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1358*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1359*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1360*67e74705SXin Li // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1361*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1362*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1363*67e74705SXin Li // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1364*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
1365*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1366*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1367*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
1368*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
1369*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1370*67e74705SXin Li // CHECK: store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1371*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
1372*67e74705SXin Li // CHECK: ret %struct.int8x8x2_t [[TMP8]]
test_vzip_s8(int8x8_t a,int8x8_t b)1373*67e74705SXin Li int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
1374*67e74705SXin Li return vzip_s8(a, b);
1375*67e74705SXin Li }
1376*67e74705SXin Li
1377*67e74705SXin Li // CHECK-LABEL: define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1378*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1379*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1380*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1381*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1382*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1383*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1384*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1385*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1386*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1387*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1388*67e74705SXin Li // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1389*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1390*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1391*67e74705SXin Li // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
1392*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
1393*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1394*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1395*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
1396*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
1397*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
1398*67e74705SXin Li // CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1399*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
1400*67e74705SXin Li // CHECK: ret %struct.int16x4x2_t [[TMP12]]
test_vzip_s16(int16x4_t a,int16x4_t b)1401*67e74705SXin Li int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
1402*67e74705SXin Li return vzip_s16(a, b);
1403*67e74705SXin Li }
1404*67e74705SXin Li // CHECK-LABEL: define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1405*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1406*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1407*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1408*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1409*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1410*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1411*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1412*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1413*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1414*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1415*67e74705SXin Li // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
1416*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1417*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1418*67e74705SXin Li // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
1419*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
1420*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1421*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1422*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
1423*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
1424*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
1425*67e74705SXin Li // CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1426*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
1427*67e74705SXin Li // CHECK: ret %struct.int32x2x2_t [[TMP12]]
test_vzip_s32(int32x2_t a,int32x2_t b)1428*67e74705SXin Li int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
1429*67e74705SXin Li return vzip_s32(a, b);
1430*67e74705SXin Li }
1431*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1432*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1433*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1434*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1435*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1436*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1437*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1438*67e74705SXin Li // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1439*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1440*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1441*67e74705SXin Li // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1442*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
1443*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1444*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1445*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
1446*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
1447*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1448*67e74705SXin Li // CHECK: store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1449*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
1450*67e74705SXin Li // CHECK: ret %struct.uint8x8x2_t [[TMP8]]
test_vzip_u8(uint8x8_t a,uint8x8_t b)1451*67e74705SXin Li uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
1452*67e74705SXin Li return vzip_u8(a, b);
1453*67e74705SXin Li }
1454*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) #0 {
1455*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1456*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1457*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1458*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1459*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1460*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1461*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1462*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1463*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1464*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1465*67e74705SXin Li // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1466*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1467*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1468*67e74705SXin Li // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
1469*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1470*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1471*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1472*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1473*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1474*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
1475*67e74705SXin Li // CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1476*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1477*67e74705SXin Li // CHECK: ret %struct.uint16x4x2_t [[TMP12]]
test_vzip_u16(uint16x4_t a,uint16x4_t b)1478*67e74705SXin Li uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
1479*67e74705SXin Li return vzip_u16(a, b);
1480*67e74705SXin Li }
1481*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1482*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1483*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1484*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1485*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1486*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1487*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1488*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1489*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1490*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1491*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1492*67e74705SXin Li // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
1493*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1494*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1495*67e74705SXin Li // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
1496*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1497*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1498*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1499*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1500*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1501*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
1502*67e74705SXin Li // CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1503*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1504*67e74705SXin Li // CHECK: ret %struct.uint32x2x2_t [[TMP12]]
test_vzip_u32(uint32x2_t a,uint32x2_t b)1505*67e74705SXin Li uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
1506*67e74705SXin Li return vzip_u32(a, b);
1507*67e74705SXin Li }
1508*67e74705SXin Li // CHECK-LABEL: define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) #0 {
1509*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1510*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1511*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1512*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1513*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1514*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1515*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1516*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1517*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1518*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
1519*67e74705SXin Li // CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]]
1520*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1521*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
1522*67e74705SXin Li // CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP6]]
1523*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1524*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1525*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1526*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1527*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1528*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
1529*67e74705SXin Li // CHECK: store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
1530*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1531*67e74705SXin Li // CHECK: ret %struct.float32x2x2_t [[TMP12]]
test_vzip_f32(float32x2_t a,float32x2_t b)1532*67e74705SXin Li float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
1533*67e74705SXin Li return vzip_f32(a, b);
1534*67e74705SXin Li }
1535*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) #0 {
1536*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1537*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1538*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1539*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1540*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1541*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1542*67e74705SXin Li // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1543*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1544*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1545*67e74705SXin Li // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1546*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1547*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1548*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1549*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1550*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1551*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1552*67e74705SXin Li // CHECK: store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1553*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1554*67e74705SXin Li // CHECK: ret %struct.poly8x8x2_t [[TMP8]]
test_vzip_p8(poly8x8_t a,poly8x8_t b)1555*67e74705SXin Li poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
1556*67e74705SXin Li return vzip_p8(a, b);
1557*67e74705SXin Li }
1558*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) #0 {
1559*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1560*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1561*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1562*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1563*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1564*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1565*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1566*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1567*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1568*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1569*67e74705SXin Li // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1570*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1571*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1572*67e74705SXin Li // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
1573*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
1574*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1575*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1576*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
1577*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
1578*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
1579*67e74705SXin Li // CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1580*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
1581*67e74705SXin Li // CHECK: ret %struct.poly16x4x2_t [[TMP12]]
test_vzip_p16(poly16x4_t a,poly16x4_t b)1582*67e74705SXin Li poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
1583*67e74705SXin Li return vzip_p16(a, b);
1584*67e74705SXin Li }
1585*67e74705SXin Li // CHECK-LABEL: define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
1586*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1587*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1588*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1589*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1590*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1591*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1592*67e74705SXin Li // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1593*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1594*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1595*67e74705SXin Li // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1596*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
1597*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1598*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1599*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
1600*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
1601*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1602*67e74705SXin Li // CHECK: store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1603*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
1604*67e74705SXin Li // CHECK: ret %struct.int8x16x2_t [[TMP8]]
test_vzipq_s8(int8x16_t a,int8x16_t b)1605*67e74705SXin Li int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
1606*67e74705SXin Li return vzipq_s8(a, b);
1607*67e74705SXin Li }
1608*67e74705SXin Li // CHECK-LABEL: define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
1609*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1610*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1611*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1612*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1613*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1614*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1615*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1616*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1617*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1618*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1619*67e74705SXin Li // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1620*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1621*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1622*67e74705SXin Li // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
1623*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
1624*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1625*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1626*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
1627*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
1628*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
1629*67e74705SXin Li // CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1630*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
1631*67e74705SXin Li // CHECK: ret %struct.int16x8x2_t [[TMP12]]
test_vzipq_s16(int16x8_t a,int16x8_t b)1632*67e74705SXin Li int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
1633*67e74705SXin Li return vzipq_s16(a, b);
1634*67e74705SXin Li }
1635*67e74705SXin Li // CHECK-LABEL: define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
1636*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1637*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1638*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1639*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1640*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1641*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1642*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1643*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1644*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1645*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1646*67e74705SXin Li // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
1647*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1648*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1649*67e74705SXin Li // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
1650*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
1651*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1652*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1653*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
1654*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
1655*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
1656*67e74705SXin Li // CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
1657*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
1658*67e74705SXin Li // CHECK: ret %struct.int32x4x2_t [[TMP12]]
test_vzipq_s32(int32x4_t a,int32x4_t b)1659*67e74705SXin Li int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
1660*67e74705SXin Li return vzipq_s32(a, b);
1661*67e74705SXin Li }
1662*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
1663*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1664*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1665*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1666*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1667*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1668*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1669*67e74705SXin Li // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1670*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1671*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1672*67e74705SXin Li // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1673*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
1674*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1675*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1676*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
1677*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
1678*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1679*67e74705SXin Li // CHECK: store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1680*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
1681*67e74705SXin Li // CHECK: ret %struct.uint8x16x2_t [[TMP8]]
test_vzipq_u8(uint8x16_t a,uint8x16_t b)1682*67e74705SXin Li uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
1683*67e74705SXin Li return vzipq_u8(a, b);
1684*67e74705SXin Li }
1685*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
1686*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1687*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1688*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1689*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1690*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1691*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1692*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1693*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1694*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1695*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1696*67e74705SXin Li // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1697*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1698*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1699*67e74705SXin Li // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
1700*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
1701*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1702*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1703*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
1704*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
1705*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
1706*67e74705SXin Li // CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1707*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
1708*67e74705SXin Li // CHECK: ret %struct.uint16x8x2_t [[TMP12]]
test_vzipq_u16(uint16x8_t a,uint16x8_t b)1709*67e74705SXin Li uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
1710*67e74705SXin Li return vzipq_u16(a, b);
1711*67e74705SXin Li }
1712*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
1713*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1714*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1715*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1716*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1717*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1718*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1719*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1720*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1721*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1722*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1723*67e74705SXin Li // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
1724*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1725*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1726*67e74705SXin Li // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
1727*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
1728*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1729*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1730*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
1731*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
1732*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
1733*67e74705SXin Li // CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
1734*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
1735*67e74705SXin Li // CHECK: ret %struct.uint32x4x2_t [[TMP12]]
test_vzipq_u32(uint32x4_t a,uint32x4_t b)1736*67e74705SXin Li uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
1737*67e74705SXin Li return vzipq_u32(a, b);
1738*67e74705SXin Li }
1739*67e74705SXin Li // CHECK-LABEL: define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) #0 {
1740*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1741*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1742*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1743*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1744*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1745*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1746*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
1747*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1748*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
1749*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1750*67e74705SXin Li // CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]]
1751*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
1752*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1753*67e74705SXin Li // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP6]]
1754*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
1755*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1756*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1757*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
1758*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
1759*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
1760*67e74705SXin Li // CHECK: store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
1761*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
1762*67e74705SXin Li // CHECK: ret %struct.float32x4x2_t [[TMP12]]
test_vzipq_f32(float32x4_t a,float32x4_t b)1763*67e74705SXin Li float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
1764*67e74705SXin Li return vzipq_f32(a, b);
1765*67e74705SXin Li }
1766*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
1767*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1768*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1769*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1770*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1771*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1772*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1773*67e74705SXin Li // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1774*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1775*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1776*67e74705SXin Li // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1777*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
1778*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1779*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1780*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
1781*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
1782*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1783*67e74705SXin Li // CHECK: store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1784*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
1785*67e74705SXin Li // CHECK: ret %struct.poly8x16x2_t [[TMP8]]
test_vzipq_p8(poly8x16_t a,poly8x16_t b)1786*67e74705SXin Li poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
1787*67e74705SXin Li return vzipq_p8(a, b);
1788*67e74705SXin Li }
1789*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
1790*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1791*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1792*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1793*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1794*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1795*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1796*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1797*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1798*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1799*67e74705SXin Li // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1800*67e74705SXin Li // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1801*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1802*67e74705SXin Li // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1803*67e74705SXin Li // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
1804*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
1805*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1806*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
1807*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
1808*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
1809*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
1810*67e74705SXin Li // CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
1811*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
1812*67e74705SXin Li // CHECK: ret %struct.poly16x8x2_t [[TMP12]]
test_vzipq_p16(poly16x8_t a,poly16x8_t b)1813*67e74705SXin Li poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
1814*67e74705SXin Li return vzipq_p16(a, b);
1815*67e74705SXin Li }
1816*67e74705SXin Li
1817*67e74705SXin Li // CHECK-LABEL: define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) #0 {
1818*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1819*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1820*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1821*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1822*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1823*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1824*67e74705SXin Li // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1825*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1826*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1827*67e74705SXin Li // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1828*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
1829*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1830*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1831*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
1832*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
1833*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1834*67e74705SXin Li // CHECK: store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1835*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
1836*67e74705SXin Li // CHECK: ret %struct.int8x8x2_t [[TMP8]]
test_vtrn_s8(int8x8_t a,int8x8_t b)1837*67e74705SXin Li int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
1838*67e74705SXin Li return vtrn_s8(a, b);
1839*67e74705SXin Li }
1840*67e74705SXin Li
1841*67e74705SXin Li // CHECK-LABEL: define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1842*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1843*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1844*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1845*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1846*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1847*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1848*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1849*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1850*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1851*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1852*67e74705SXin Li // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
1853*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1854*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1855*67e74705SXin Li // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
1856*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
1857*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1858*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1859*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
1860*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
1861*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
1862*67e74705SXin Li // CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1863*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
1864*67e74705SXin Li // CHECK: ret %struct.int16x4x2_t [[TMP12]]
test_vtrn_s16(int16x4_t a,int16x4_t b)1865*67e74705SXin Li int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
1866*67e74705SXin Li return vtrn_s16(a, b);
1867*67e74705SXin Li }
1868*67e74705SXin Li // CHECK-LABEL: define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1869*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1870*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1871*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1872*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1873*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1874*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1875*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1876*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1877*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1878*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1879*67e74705SXin Li // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
1880*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1881*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1882*67e74705SXin Li // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
1883*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
1884*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1885*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1886*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
1887*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
1888*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
1889*67e74705SXin Li // CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1890*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
1891*67e74705SXin Li // CHECK: ret %struct.int32x2x2_t [[TMP12]]
test_vtrn_s32(int32x2_t a,int32x2_t b)1892*67e74705SXin Li int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
1893*67e74705SXin Li return vtrn_s32(a, b);
1894*67e74705SXin Li }
1895*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1896*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1897*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1898*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1899*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1900*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1901*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1902*67e74705SXin Li // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1903*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1904*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1905*67e74705SXin Li // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1906*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
1907*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1908*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1909*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
1910*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
1911*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1912*67e74705SXin Li // CHECK: store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1913*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
1914*67e74705SXin Li // CHECK: ret %struct.uint8x8x2_t [[TMP8]]
test_vtrn_u8(uint8x8_t a,uint8x8_t b)1915*67e74705SXin Li uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
1916*67e74705SXin Li return vtrn_u8(a, b);
1917*67e74705SXin Li }
1918*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) #0 {
1919*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1920*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1921*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1922*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1923*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1924*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1925*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1926*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1927*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1928*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1929*67e74705SXin Li // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
1930*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1931*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1932*67e74705SXin Li // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
1933*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1934*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1935*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1936*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1937*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1938*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
1939*67e74705SXin Li // CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
1940*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1941*67e74705SXin Li // CHECK: ret %struct.uint16x4x2_t [[TMP12]]
test_vtrn_u16(uint16x4_t a,uint16x4_t b)1942*67e74705SXin Li uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
1943*67e74705SXin Li return vtrn_u16(a, b);
1944*67e74705SXin Li }
1945*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1946*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1947*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1948*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1949*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1950*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1951*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1952*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1953*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1954*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1955*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
1956*67e74705SXin Li // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
1957*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1958*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
1959*67e74705SXin Li // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
1960*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1961*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1962*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1963*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1964*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1965*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
1966*67e74705SXin Li // CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
1967*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1968*67e74705SXin Li // CHECK: ret %struct.uint32x2x2_t [[TMP12]]
test_vtrn_u32(uint32x2_t a,uint32x2_t b)1969*67e74705SXin Li uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
1970*67e74705SXin Li return vtrn_u32(a, b);
1971*67e74705SXin Li }
1972*67e74705SXin Li // CHECK-LABEL: define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) #0 {
1973*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1974*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1975*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1976*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1977*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1978*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1979*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1980*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1981*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1982*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
1983*67e74705SXin Li // CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]]
1984*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1985*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
1986*67e74705SXin Li // CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP6]]
1987*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1988*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1989*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
1990*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1991*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1992*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
1993*67e74705SXin Li // CHECK: store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
1994*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1995*67e74705SXin Li // CHECK: ret %struct.float32x2x2_t [[TMP12]]
test_vtrn_f32(float32x2_t a,float32x2_t b)1996*67e74705SXin Li float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
1997*67e74705SXin Li return vtrn_f32(a, b);
1998*67e74705SXin Li }
1999*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) #0 {
2000*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
2001*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
2002*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
2003*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
2004*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
2005*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2006*67e74705SXin Li // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
2007*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
2008*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2009*67e74705SXin Li // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
2010*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
2011*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
2012*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
2013*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
2014*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
2015*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
2016*67e74705SXin Li // CHECK: store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
2017*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
2018*67e74705SXin Li // CHECK: ret %struct.poly8x8x2_t [[TMP8]]
test_vtrn_p8(poly8x8_t a,poly8x8_t b)2019*67e74705SXin Li poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
2020*67e74705SXin Li return vtrn_p8(a, b);
2021*67e74705SXin Li }
2022*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) #0 {
2023*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
2024*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
2025*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
2026*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
2027*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2028*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2029*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
2030*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2031*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
2032*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2033*67e74705SXin Li // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
2034*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
2035*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2036*67e74705SXin Li // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
2037*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
2038*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
2039*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
2040*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
2041*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
2042*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
2043*67e74705SXin Li // CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
2044*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
2045*67e74705SXin Li // CHECK: ret %struct.poly16x4x2_t [[TMP12]]
test_vtrn_p16(poly16x4_t a,poly16x4_t b)2046*67e74705SXin Li poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
2047*67e74705SXin Li return vtrn_p16(a, b);
2048*67e74705SXin Li }
2049*67e74705SXin Li // CHECK-LABEL: define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
2050*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
2051*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
2052*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
2053*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
2054*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2055*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2056*67e74705SXin Li // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2057*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2058*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2059*67e74705SXin Li // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2060*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
2061*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
2062*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
2063*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
2064*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
2065*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
2066*67e74705SXin Li // CHECK: store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2067*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
2068*67e74705SXin Li // CHECK: ret %struct.int8x16x2_t [[TMP8]]
test_vtrnq_s8(int8x16_t a,int8x16_t b)2069*67e74705SXin Li int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
2070*67e74705SXin Li return vtrnq_s8(a, b);
2071*67e74705SXin Li }
2072*67e74705SXin Li // CHECK-LABEL: define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
2073*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
2074*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
2075*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
2076*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
2077*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2078*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2079*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2080*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2081*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
2082*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2083*67e74705SXin Li // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2084*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2085*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2086*67e74705SXin Li // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
2087*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
2088*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
2089*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2090*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
2091*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
2092*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
2093*67e74705SXin Li // CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
2094*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
2095*67e74705SXin Li // CHECK: ret %struct.int16x8x2_t [[TMP12]]
test_vtrnq_s16(int16x8_t a,int16x8_t b)2096*67e74705SXin Li int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
2097*67e74705SXin Li return vtrnq_s16(a, b);
2098*67e74705SXin Li }
2099*67e74705SXin Li // CHECK-LABEL: define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
2100*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
2101*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
2102*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
2103*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
2104*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2105*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2106*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
2107*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2108*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2109*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2110*67e74705SXin Li // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
2111*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
2112*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2113*67e74705SXin Li // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
2114*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
2115*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
2116*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2117*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
2118*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
2119*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
2120*67e74705SXin Li // CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
2121*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
2122*67e74705SXin Li // CHECK: ret %struct.int32x4x2_t [[TMP12]]
test_vtrnq_s32(int32x4_t a,int32x4_t b)2123*67e74705SXin Li int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
2124*67e74705SXin Li return vtrnq_s32(a, b);
2125*67e74705SXin Li }
2126*67e74705SXin Li // CHECK-LABEL: define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
2127*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
2128*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
2129*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
2130*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
2131*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2132*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2133*67e74705SXin Li // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2134*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2135*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2136*67e74705SXin Li // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2137*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
2138*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
2139*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
2140*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
2141*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
2142*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
2143*67e74705SXin Li // CHECK: store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2144*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
2145*67e74705SXin Li // CHECK: ret %struct.uint8x16x2_t [[TMP8]]
test_vtrnq_u8(uint8x16_t a,uint8x16_t b)2146*67e74705SXin Li uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
2147*67e74705SXin Li return vtrnq_u8(a, b);
2148*67e74705SXin Li }
2149*67e74705SXin Li // CHECK-LABEL: define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
2150*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
2151*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
2152*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
2153*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
2154*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2155*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2156*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2157*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2158*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
2159*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2160*67e74705SXin Li // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2161*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2162*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2163*67e74705SXin Li // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
2164*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
2165*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
2166*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2167*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
2168*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
2169*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
2170*67e74705SXin Li // CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
2171*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
2172*67e74705SXin Li // CHECK: ret %struct.uint16x8x2_t [[TMP12]]
test_vtrnq_u16(uint16x8_t a,uint16x8_t b)2173*67e74705SXin Li uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
2174*67e74705SXin Li return vtrnq_u16(a, b);
2175*67e74705SXin Li }
2176*67e74705SXin Li // CHECK-LABEL: define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
2177*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
2178*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
2179*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
2180*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
2181*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2182*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2183*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
2184*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2185*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2186*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2187*67e74705SXin Li // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
2188*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
2189*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2190*67e74705SXin Li // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
2191*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
2192*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
2193*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2194*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
2195*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
2196*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
2197*67e74705SXin Li // CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
2198*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
2199*67e74705SXin Li // CHECK: ret %struct.uint32x4x2_t [[TMP12]]
test_vtrnq_u32(uint32x4_t a,uint32x4_t b)2200*67e74705SXin Li uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
2201*67e74705SXin Li return vtrnq_u32(a, b);
2202*67e74705SXin Li }
2203*67e74705SXin Li // CHECK-LABEL: define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) #0 {
2204*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
2205*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
2206*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
2207*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
2208*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
2209*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
2210*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
2211*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
2212*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
2213*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2214*67e74705SXin Li // CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]]
2215*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
2216*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2217*67e74705SXin Li // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP6]]
2218*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
2219*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
2220*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2221*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
2222*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
2223*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
2224*67e74705SXin Li // CHECK: store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
2225*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
2226*67e74705SXin Li // CHECK: ret %struct.float32x4x2_t [[TMP12]]
test_vtrnq_f32(float32x4_t a,float32x4_t b)2227*67e74705SXin Li float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
2228*67e74705SXin Li return vtrnq_f32(a, b);
2229*67e74705SXin Li }
2230*67e74705SXin Li // CHECK-LABEL: define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
2231*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
2232*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
2233*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
2234*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
2235*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2236*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2237*67e74705SXin Li // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2238*67e74705SXin Li // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2239*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2240*67e74705SXin Li // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2241*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
2242*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
2243*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
2244*67e74705SXin Li // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
2245*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
2246*67e74705SXin Li // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
2247*67e74705SXin Li // CHECK: store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2248*67e74705SXin Li // CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
2249*67e74705SXin Li // CHECK: ret %struct.poly8x16x2_t [[TMP8]]
test_vtrnq_p8(poly8x16_t a,poly8x16_t b)2250*67e74705SXin Li poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
2251*67e74705SXin Li return vtrnq_p8(a, b);
2252*67e74705SXin Li }
2253*67e74705SXin Li // CHECK-LABEL: define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
2254*67e74705SXin Li // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
2255*67e74705SXin Li // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
2256*67e74705SXin Li // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
2257*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
2258*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2259*67e74705SXin Li // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2260*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2261*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2262*67e74705SXin Li // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
2263*67e74705SXin Li // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2264*67e74705SXin Li // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2265*67e74705SXin Li // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2266*67e74705SXin Li // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2267*67e74705SXin Li // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
2268*67e74705SXin Li // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
2269*67e74705SXin Li // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
2270*67e74705SXin Li // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
2271*67e74705SXin Li // CHECK: [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
2272*67e74705SXin Li // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
2273*67e74705SXin Li // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
2274*67e74705SXin Li // CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
2275*67e74705SXin Li // CHECK: [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
2276*67e74705SXin Li // CHECK: ret %struct.poly16x8x2_t [[TMP12]]
test_vtrnq_p16(poly16x8_t a,poly16x8_t b)2277*67e74705SXin Li poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
2278*67e74705SXin Li return vtrnq_p16(a, b);
2279*67e74705SXin Li }
2280