xref: /aosp_15_r20/external/clang/test/CodeGen/aarch64-poly64.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2*67e74705SXin Li // RUN:  -ffp-contract=fast -emit-llvm -o - %s | opt -S -mem2reg \
3*67e74705SXin Li // RUN:  | FileCheck %s
4*67e74705SXin Li 
5*67e74705SXin Li // Test new aarch64 intrinsics with poly64
6*67e74705SXin Li 
7*67e74705SXin Li #include <arm_neon.h>
8*67e74705SXin Li 
9*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vceq_p64(<1 x i64> %a, <1 x i64> %b) #0 {
10*67e74705SXin Li // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
11*67e74705SXin Li // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
12*67e74705SXin Li // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_p64(poly64x1_t a,poly64x1_t b)13*67e74705SXin Li uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) {
14*67e74705SXin Li   return vceq_p64(a, b);
15*67e74705SXin Li }
16*67e74705SXin Li 
17*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
18*67e74705SXin Li // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b
19*67e74705SXin Li // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
20*67e74705SXin Li // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_p64(poly64x2_t a,poly64x2_t b)21*67e74705SXin Li uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) {
22*67e74705SXin Li   return vceqq_p64(a, b);
23*67e74705SXin Li }
24*67e74705SXin Li 
25*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vtst_p64(<1 x i64> %a, <1 x i64> %b) #0 {
26*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
27*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
28*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
29*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
30*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
31*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
32*67e74705SXin Li // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
33*67e74705SXin Li // CHECK:   ret <1 x i64> [[VTST_I]]
test_vtst_p64(poly64x1_t a,poly64x1_t b)34*67e74705SXin Li uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) {
35*67e74705SXin Li   return vtst_p64(a, b);
36*67e74705SXin Li }
37*67e74705SXin Li 
38*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
39*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
40*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
41*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
42*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
43*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
44*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
45*67e74705SXin Li // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
46*67e74705SXin Li // CHECK:   ret <2 x i64> [[VTST_I]]
test_vtstq_p64(poly64x2_t a,poly64x2_t b)47*67e74705SXin Li uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) {
48*67e74705SXin Li   return vtstq_p64(a, b);
49*67e74705SXin Li }
50*67e74705SXin Li 
51*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vbsl_p64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 {
52*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
53*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
54*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
55*67e74705SXin Li // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
56*67e74705SXin Li // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
57*67e74705SXin Li // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
58*67e74705SXin Li // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
59*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
60*67e74705SXin Li // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
61*67e74705SXin Li // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
62*67e74705SXin Li // CHECK:   ret <1 x i64> [[VBSL5_I]]
test_vbsl_p64(poly64x1_t a,poly64x1_t b,poly64x1_t c)63*67e74705SXin Li poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {
64*67e74705SXin Li   return vbsl_p64(a, b, c);
65*67e74705SXin Li }
66*67e74705SXin Li 
67*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 {
68*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
69*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
70*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
71*67e74705SXin Li // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
72*67e74705SXin Li // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
73*67e74705SXin Li // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
74*67e74705SXin Li // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
75*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
76*67e74705SXin Li // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
77*67e74705SXin Li // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
78*67e74705SXin Li // CHECK:   ret <2 x i64> [[VBSL5_I]]
test_vbslq_p64(poly64x2_t a,poly64x2_t b,poly64x2_t c)79*67e74705SXin Li poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) {
80*67e74705SXin Li   return vbslq_p64(a, b, c);
81*67e74705SXin Li }
82*67e74705SXin Li 
83*67e74705SXin Li // CHECK-LABEL: define i64 @test_vget_lane_p64(<1 x i64> %v) #0 {
84*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
85*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
86*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
87*67e74705SXin Li // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_p64(poly64x1_t v)88*67e74705SXin Li poly64_t test_vget_lane_p64(poly64x1_t v) {
89*67e74705SXin Li   return vget_lane_p64(v, 0);
90*67e74705SXin Li }
91*67e74705SXin Li 
92*67e74705SXin Li // CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #0 {
93*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
94*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
95*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
96*67e74705SXin Li // CHECK:   ret i64 [[VGETQ_LANE]]
test_vgetq_lane_p64(poly64x2_t v)97*67e74705SXin Li poly64_t test_vgetq_lane_p64(poly64x2_t v) {
98*67e74705SXin Li   return vgetq_lane_p64(v, 1);
99*67e74705SXin Li }
100*67e74705SXin Li 
101*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vset_lane_p64(i64 %a, <1 x i64> %v) #0 {
102*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
103*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
104*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
105*67e74705SXin Li // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_p64(poly64_t a,poly64x1_t v)106*67e74705SXin Li poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
107*67e74705SXin Li   return vset_lane_p64(a, v, 0);
108*67e74705SXin Li }
109*67e74705SXin Li 
110*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #0 {
111*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
112*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
113*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
114*67e74705SXin Li // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_p64(poly64_t a,poly64x2_t v)115*67e74705SXin Li poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
116*67e74705SXin Li   return vsetq_lane_p64(a, v, 1);
117*67e74705SXin Li }
118*67e74705SXin Li 
119*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vcopy_lane_p64(<1 x i64> %a, <1 x i64> %b) #0 {
120*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
121*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
122*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
123*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %a to <8 x i8>
124*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
125*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 0
126*67e74705SXin Li // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vcopy_lane_p64(poly64x1_t a,poly64x1_t b)127*67e74705SXin Li poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
128*67e74705SXin Li   return vcopy_lane_p64(a, 0, b, 0);
129*67e74705SXin Li 
130*67e74705SXin Li }
131*67e74705SXin Li 
132*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
133*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
134*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
135*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
136*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
137*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
138*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 1
139*67e74705SXin Li // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vcopyq_lane_p64(poly64x2_t a,poly64x1_t b)140*67e74705SXin Li poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
141*67e74705SXin Li   return vcopyq_lane_p64(a, 1, b, 0);
142*67e74705SXin Li }
143*67e74705SXin Li 
144*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
145*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
146*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
147*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
148*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
149*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
150*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 1
151*67e74705SXin Li // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vcopyq_laneq_p64(poly64x2_t a,poly64x2_t b)152*67e74705SXin Li poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
153*67e74705SXin Li   return vcopyq_laneq_p64(a, 1, b, 1);
154*67e74705SXin Li }
155*67e74705SXin Li 
156*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vcreate_p64(i64 %a) #0 {
157*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
158*67e74705SXin Li // CHECK:   ret <1 x i64> [[TMP0]]
test_vcreate_p64(uint64_t a)159*67e74705SXin Li poly64x1_t test_vcreate_p64(uint64_t a) {
160*67e74705SXin Li   return vcreate_p64(a);
161*67e74705SXin Li }
162*67e74705SXin Li 
163*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vdup_n_p64(i64 %a) #0 {
164*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
165*67e74705SXin Li // CHECK:   ret <1 x i64> [[VECINIT_I]]
test_vdup_n_p64(poly64_t a)166*67e74705SXin Li poly64x1_t test_vdup_n_p64(poly64_t a) {
167*67e74705SXin Li   return vdup_n_p64(a);
168*67e74705SXin Li }
169*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #0 {
170*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
171*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
172*67e74705SXin Li // CHECK:   ret <2 x i64> [[VECINIT1_I]]
test_vdupq_n_p64(poly64_t a)173*67e74705SXin Li poly64x2_t test_vdupq_n_p64(poly64_t a) {
174*67e74705SXin Li   return vdupq_n_p64(a);
175*67e74705SXin Li }
176*67e74705SXin Li 
177*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vmov_n_p64(i64 %a) #0 {
178*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
179*67e74705SXin Li // CHECK:   ret <1 x i64> [[VECINIT_I]]
test_vmov_n_p64(poly64_t a)180*67e74705SXin Li poly64x1_t test_vmov_n_p64(poly64_t a) {
181*67e74705SXin Li   return vmov_n_p64(a);
182*67e74705SXin Li }
183*67e74705SXin Li 
184*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #0 {
185*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
186*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
187*67e74705SXin Li // CHECK:   ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_p64(poly64_t a)188*67e74705SXin Li poly64x2_t test_vmovq_n_p64(poly64_t a) {
189*67e74705SXin Li   return vmovq_n_p64(a);
190*67e74705SXin Li }
191*67e74705SXin Li 
192*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vdup_lane_p64(<1 x i64> %vec) #0 {
193*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <1 x i32> zeroinitializer
194*67e74705SXin Li // CHECK:   ret <1 x i64> [[SHUFFLE]]
test_vdup_lane_p64(poly64x1_t vec)195*67e74705SXin Li poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
196*67e74705SXin Li   return vdup_lane_p64(vec, 0);
197*67e74705SXin Li }
198*67e74705SXin Li 
199*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #0 {
200*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer
201*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE]]
test_vdupq_lane_p64(poly64x1_t vec)202*67e74705SXin Li poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
203*67e74705SXin Li   return vdupq_lane_p64(vec, 0);
204*67e74705SXin Li }
205*67e74705SXin Li 
206*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #0 {
207*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> <i32 1, i32 1>
208*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE]]
test_vdupq_laneq_p64(poly64x2_t vec)209*67e74705SXin Li poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
210*67e74705SXin Li   return vdupq_laneq_p64(vec, 1);
211*67e74705SXin Li }
212*67e74705SXin Li 
213*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #0 {
214*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1>
215*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_p64(poly64x1_t low,poly64x1_t high)216*67e74705SXin Li poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
217*67e74705SXin Li   return vcombine_p64(low, high);
218*67e74705SXin Li }
219*67e74705SXin Li 
220*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vld1_p64(i64* %ptr) #0 {
221*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast i64* %ptr to i8*
222*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
223*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
224*67e74705SXin Li // CHECK:   ret <1 x i64> [[TMP2]]
test_vld1_p64(poly64_t const * ptr)225*67e74705SXin Li poly64x1_t test_vld1_p64(poly64_t const * ptr) {
226*67e74705SXin Li   return vld1_p64(ptr);
227*67e74705SXin Li }
228*67e74705SXin Li 
229*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #0 {
230*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast i64* %ptr to i8*
231*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
232*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
233*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vld1q_p64(poly64_t const * ptr)234*67e74705SXin Li poly64x2_t test_vld1q_p64(poly64_t const * ptr) {
235*67e74705SXin Li   return vld1q_p64(ptr);
236*67e74705SXin Li }
237*67e74705SXin Li 
238*67e74705SXin Li // CHECK-LABEL: define void @test_vst1_p64(i64* %ptr, <1 x i64> %val) #0 {
239*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast i64* %ptr to i8*
240*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %val to <8 x i8>
241*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
242*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
243*67e74705SXin Li // CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
244*67e74705SXin Li // CHECK:   ret void
test_vst1_p64(poly64_t * ptr,poly64x1_t val)245*67e74705SXin Li void test_vst1_p64(poly64_t * ptr, poly64x1_t val) {
246*67e74705SXin Li   return vst1_p64(ptr, val);
247*67e74705SXin Li }
248*67e74705SXin Li 
249*67e74705SXin Li // CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #0 {
250*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast i64* %ptr to i8*
251*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8>
252*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
253*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
254*67e74705SXin Li // CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
255*67e74705SXin Li // CHECK:   ret void
test_vst1q_p64(poly64_t * ptr,poly64x2_t val)256*67e74705SXin Li void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) {
257*67e74705SXin Li   return vst1q_p64(ptr, val);
258*67e74705SXin Li }
259*67e74705SXin Li 
260*67e74705SXin Li // CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #0 {
261*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
262*67e74705SXin Li // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
263*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
264*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i64* %ptr to i8*
265*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
266*67e74705SXin Li // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
267*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
268*67e74705SXin Li // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
269*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
270*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
271*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
272*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
273*67e74705SXin Li // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
test_vld2_p64(poly64_t const * ptr)274*67e74705SXin Li poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
275*67e74705SXin Li   return vld2_p64(ptr);
276*67e74705SXin Li }
277*67e74705SXin Li 
278*67e74705SXin Li // CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #0 {
279*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
280*67e74705SXin Li // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
281*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
282*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i64* %ptr to i8*
283*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
284*67e74705SXin Li // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
285*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
286*67e74705SXin Li // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
287*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
288*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
289*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
290*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
291*67e74705SXin Li // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
test_vld2q_p64(poly64_t const * ptr)292*67e74705SXin Li poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
293*67e74705SXin Li   return vld2q_p64(ptr);
294*67e74705SXin Li }
295*67e74705SXin Li 
296*67e74705SXin Li // CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #0 {
297*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
298*67e74705SXin Li // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
299*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
300*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i64* %ptr to i8*
301*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
302*67e74705SXin Li // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
303*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
304*67e74705SXin Li // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
305*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
306*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
307*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
308*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
309*67e74705SXin Li // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
test_vld3_p64(poly64_t const * ptr)310*67e74705SXin Li poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
311*67e74705SXin Li   return vld3_p64(ptr);
312*67e74705SXin Li }
313*67e74705SXin Li 
314*67e74705SXin Li // CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #0 {
315*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
316*67e74705SXin Li // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
317*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
318*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i64* %ptr to i8*
319*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
320*67e74705SXin Li // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
321*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
322*67e74705SXin Li // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
323*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
324*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
325*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
326*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
327*67e74705SXin Li // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
test_vld3q_p64(poly64_t const * ptr)328*67e74705SXin Li poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
329*67e74705SXin Li   return vld3q_p64(ptr);
330*67e74705SXin Li }
331*67e74705SXin Li 
332*67e74705SXin Li // CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #0 {
333*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
334*67e74705SXin Li // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
335*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
336*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i64* %ptr to i8*
337*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
338*67e74705SXin Li // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
339*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
340*67e74705SXin Li // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
341*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
342*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
343*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
344*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
345*67e74705SXin Li // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
test_vld4_p64(poly64_t const * ptr)346*67e74705SXin Li poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
347*67e74705SXin Li   return vld4_p64(ptr);
348*67e74705SXin Li }
349*67e74705SXin Li 
350*67e74705SXin Li // CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #0 {
351*67e74705SXin Li // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
352*67e74705SXin Li // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
353*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
354*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast i64* %ptr to i8*
355*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
356*67e74705SXin Li // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
357*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
358*67e74705SXin Li // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
359*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
360*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
361*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
362*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
363*67e74705SXin Li // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
test_vld4q_p64(poly64_t const * ptr)364*67e74705SXin Li poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
365*67e74705SXin Li   return vld4q_p64(ptr);
366*67e74705SXin Li }
367*67e74705SXin Li 
368*67e74705SXin Li // CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #0 {
369*67e74705SXin Li // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
370*67e74705SXin Li // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
371*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[VAL]], i32 0, i32 0
372*67e74705SXin Li // CHECK:   store [2 x <1 x i64>] [[VAL]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
373*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
374*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[VAL]] to i8*
375*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
376*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
377*67e74705SXin Li // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
378*67e74705SXin Li // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 0
379*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
380*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
381*67e74705SXin Li // CHECK:   [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
382*67e74705SXin Li // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL2]], i64 0, i64 1
383*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8
384*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
385*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
386*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
387*67e74705SXin Li // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
388*67e74705SXin Li // CHECK:   ret void
test_vst2_p64(poly64_t * ptr,poly64x1x2_t val)389*67e74705SXin Li void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
390*67e74705SXin Li   return vst2_p64(ptr, val);
391*67e74705SXin Li }
392*67e74705SXin Li 
393*67e74705SXin Li // CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #0 {
394*67e74705SXin Li // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
395*67e74705SXin Li // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
396*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[VAL]], i32 0, i32 0
397*67e74705SXin Li // CHECK:   store [2 x <2 x i64>] [[VAL]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
398*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
399*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[VAL]] to i8*
400*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
401*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
402*67e74705SXin Li // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
403*67e74705SXin Li // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 0
404*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
405*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
406*67e74705SXin Li // CHECK:   [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
407*67e74705SXin Li // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL2]], i64 0, i64 1
408*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16
409*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
410*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
411*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
412*67e74705SXin Li // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
413*67e74705SXin Li // CHECK:   ret void
test_vst2q_p64(poly64_t * ptr,poly64x2x2_t val)414*67e74705SXin Li void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
415*67e74705SXin Li   return vst2q_p64(ptr, val);
416*67e74705SXin Li }
417*67e74705SXin Li 
418*67e74705SXin Li // CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #0 {
419*67e74705SXin Li // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
420*67e74705SXin Li // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
421*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[VAL]], i32 0, i32 0
422*67e74705SXin Li // CHECK:   store [3 x <1 x i64>] [[VAL]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
423*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
424*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[VAL]] to i8*
425*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
426*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
427*67e74705SXin Li // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
428*67e74705SXin Li // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 0
429*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
430*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
431*67e74705SXin Li // CHECK:   [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
432*67e74705SXin Li // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL2]], i64 0, i64 1
433*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8
434*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
435*67e74705SXin Li // CHECK:   [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
436*67e74705SXin Li // CHECK:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL4]], i64 0, i64 2
437*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8
438*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
439*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
440*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
441*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
442*67e74705SXin Li // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
443*67e74705SXin Li // CHECK:   ret void
test_vst3_p64(poly64_t * ptr,poly64x1x3_t val)444*67e74705SXin Li void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
445*67e74705SXin Li   return vst3_p64(ptr, val);
446*67e74705SXin Li }
447*67e74705SXin Li 
448*67e74705SXin Li // CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #0 {
449*67e74705SXin Li // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
450*67e74705SXin Li // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
451*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[VAL]], i32 0, i32 0
452*67e74705SXin Li // CHECK:   store [3 x <2 x i64>] [[VAL]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
453*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
454*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[VAL]] to i8*
455*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
456*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
457*67e74705SXin Li // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
458*67e74705SXin Li // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 0
459*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
460*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
461*67e74705SXin Li // CHECK:   [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
462*67e74705SXin Li // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL2]], i64 0, i64 1
463*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16
464*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
465*67e74705SXin Li // CHECK:   [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
466*67e74705SXin Li // CHECK:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL4]], i64 0, i64 2
467*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16
468*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
469*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
470*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
471*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
472*67e74705SXin Li // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
473*67e74705SXin Li // CHECK:   ret void
test_vst3q_p64(poly64_t * ptr,poly64x2x3_t val)474*67e74705SXin Li void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
475*67e74705SXin Li   return vst3q_p64(ptr, val);
476*67e74705SXin Li }
477*67e74705SXin Li 
478*67e74705SXin Li // CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #0 {
479*67e74705SXin Li // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
480*67e74705SXin Li // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
481*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[VAL]], i32 0, i32 0
482*67e74705SXin Li // CHECK:   store [4 x <1 x i64>] [[VAL]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
483*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
484*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[VAL]] to i8*
485*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
486*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
487*67e74705SXin Li // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
488*67e74705SXin Li // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 0
489*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
490*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
491*67e74705SXin Li // CHECK:   [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
492*67e74705SXin Li // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL2]], i64 0, i64 1
493*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8
494*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
495*67e74705SXin Li // CHECK:   [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
496*67e74705SXin Li // CHECK:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL4]], i64 0, i64 2
497*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8
498*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
499*67e74705SXin Li // CHECK:   [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
500*67e74705SXin Li // CHECK:   [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL6]], i64 0, i64 3
501*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX7]], align 8
502*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
503*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
504*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
505*67e74705SXin Li // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
506*67e74705SXin Li // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
507*67e74705SXin Li // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
508*67e74705SXin Li // CHECK:   ret void
test_vst4_p64(poly64_t * ptr,poly64x1x4_t val)509*67e74705SXin Li void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
510*67e74705SXin Li   return vst4_p64(ptr, val);
511*67e74705SXin Li }
512*67e74705SXin Li 
513*67e74705SXin Li // CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #0 {
514*67e74705SXin Li // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
515*67e74705SXin Li // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
516*67e74705SXin Li // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[VAL]], i32 0, i32 0
517*67e74705SXin Li // CHECK:   store [4 x <2 x i64>] [[VAL]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
518*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
519*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[VAL]] to i8*
520*67e74705SXin Li // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
521*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
522*67e74705SXin Li // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
523*67e74705SXin Li // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 0
524*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
525*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
526*67e74705SXin Li // CHECK:   [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
527*67e74705SXin Li // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL2]], i64 0, i64 1
528*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16
529*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
530*67e74705SXin Li // CHECK:   [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
531*67e74705SXin Li // CHECK:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL4]], i64 0, i64 2
532*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16
533*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
534*67e74705SXin Li // CHECK:   [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
535*67e74705SXin Li // CHECK:   [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL6]], i64 0, i64 3
536*67e74705SXin Li // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX7]], align 16
537*67e74705SXin Li // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
538*67e74705SXin Li // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
539*67e74705SXin Li // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
540*67e74705SXin Li // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
541*67e74705SXin Li // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
542*67e74705SXin Li // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
543*67e74705SXin Li // CHECK:   ret void
test_vst4q_p64(poly64_t * ptr,poly64x2x4_t val)544*67e74705SXin Li void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) {
545*67e74705SXin Li   return vst4q_p64(ptr, val);
546*67e74705SXin Li }
547*67e74705SXin Li 
548*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vext_p64(<1 x i64> %a, <1 x i64> %b) #0 {
549*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
550*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
551*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
552*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
553*67e74705SXin Li // CHECK:   [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
554*67e74705SXin Li // CHECK:   ret <1 x i64> [[VEXT]]
test_vext_p64(poly64x1_t a,poly64x1_t b)555*67e74705SXin Li poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) {
556*67e74705SXin Li   return vext_u64(a, b, 0);
557*67e74705SXin Li 
558*67e74705SXin Li }
559*67e74705SXin Li 
560*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
561*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
562*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
563*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
564*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
565*67e74705SXin Li // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
566*67e74705SXin Li // CHECK:   ret <2 x i64> [[VEXT]]
test_vextq_p64(poly64x2_t a,poly64x2_t b)567*67e74705SXin Li poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) {
568*67e74705SXin Li   return vextq_p64(a, b, 1);
569*67e74705SXin Li }
570*67e74705SXin Li 
571*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
572*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
573*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip1q_p64(poly64x2_t a,poly64x2_t b)574*67e74705SXin Li poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
575*67e74705SXin Li   return vzip1q_p64(a, b);
576*67e74705SXin Li }
577*67e74705SXin Li 
578*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
579*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
580*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip2q_p64(poly64x2_t a,poly64x2_t b)581*67e74705SXin Li poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
582*67e74705SXin Li   return vzip2q_u64(a, b);
583*67e74705SXin Li }
584*67e74705SXin Li 
585*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
586*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
587*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp1q_p64(poly64x2_t a,poly64x2_t b)588*67e74705SXin Li poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
589*67e74705SXin Li   return vuzp1q_p64(a, b);
590*67e74705SXin Li }
591*67e74705SXin Li 
592*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
593*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
594*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp2q_p64(poly64x2_t a,poly64x2_t b)595*67e74705SXin Li poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
596*67e74705SXin Li   return vuzp2q_u64(a, b);
597*67e74705SXin Li }
598*67e74705SXin Li 
599*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
600*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
601*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn1q_p64(poly64x2_t a,poly64x2_t b)602*67e74705SXin Li poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
603*67e74705SXin Li   return vtrn1q_p64(a, b);
604*67e74705SXin Li }
605*67e74705SXin Li 
606*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
607*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
608*67e74705SXin Li // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn2q_p64(poly64x2_t a,poly64x2_t b)609*67e74705SXin Li poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
610*67e74705SXin Li   return vtrn2q_u64(a, b);
611*67e74705SXin Li }
612*67e74705SXin Li 
613*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vsri_n_p64(<1 x i64> %a, <1 x i64> %b) #0 {
614*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
615*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
616*67e74705SXin Li // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
617*67e74705SXin Li // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
618*67e74705SXin Li // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33)
619*67e74705SXin Li // CHECK:   ret <1 x i64> [[VSRI_N2]]
test_vsri_n_p64(poly64x1_t a,poly64x1_t b)620*67e74705SXin Li poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {
621*67e74705SXin Li   return vsri_n_p64(a, b, 33);
622*67e74705SXin Li }
623*67e74705SXin Li 
624*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 {
625*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
626*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
627*67e74705SXin Li // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
628*67e74705SXin Li // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
629*67e74705SXin Li // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64)
630*67e74705SXin Li // CHECK:   ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_p64(poly64x2_t a,poly64x2_t b)631*67e74705SXin Li poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) {
632*67e74705SXin Li   return vsriq_n_p64(a, b, 64);
633*67e74705SXin Li }
634*67e74705SXin Li 
635