xref: /aosp_15_r20/external/clang/test/CodeGen/aarch64-neon-vget.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
2*67e74705SXin Li // RUN:   -fallow-half-arguments-and-returns -emit-llvm -o - %s \
3*67e74705SXin Li // RUN: | opt -S -mem2reg | FileCheck %s
4*67e74705SXin Li 
5*67e74705SXin Li #include <arm_neon.h>
6*67e74705SXin Li 
7*67e74705SXin Li // CHECK-LABEL: define i8 @test_vget_lane_u8(<8 x i8> %a) #0 {
8*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
9*67e74705SXin Li // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_u8(uint8x8_t a)10*67e74705SXin Li uint8_t test_vget_lane_u8(uint8x8_t a) {
11*67e74705SXin Li   return vget_lane_u8(a, 7);
12*67e74705SXin Li }
13*67e74705SXin Li 
14*67e74705SXin Li // CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
15*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
18*67e74705SXin Li // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_u16(uint16x4_t a)19*67e74705SXin Li uint16_t test_vget_lane_u16(uint16x4_t a) {
20*67e74705SXin Li   return vget_lane_u16(a, 3);
21*67e74705SXin Li }
22*67e74705SXin Li 
23*67e74705SXin Li // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
24*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
25*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
26*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
27*67e74705SXin Li // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_u32(uint32x2_t a)28*67e74705SXin Li uint32_t test_vget_lane_u32(uint32x2_t a) {
29*67e74705SXin Li   return vget_lane_u32(a, 1);
30*67e74705SXin Li }
31*67e74705SXin Li 
32*67e74705SXin Li // CHECK-LABEL: define i8 @test_vget_lane_s8(<8 x i8> %a) #0 {
33*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
34*67e74705SXin Li // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_s8(int8x8_t a)35*67e74705SXin Li int8_t test_vget_lane_s8(int8x8_t a) {
36*67e74705SXin Li   return vget_lane_s8(a, 7);
37*67e74705SXin Li }
38*67e74705SXin Li 
39*67e74705SXin Li // CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
40*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
41*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
42*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
43*67e74705SXin Li // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_s16(int16x4_t a)44*67e74705SXin Li int16_t test_vget_lane_s16(int16x4_t a) {
45*67e74705SXin Li   return vget_lane_s16(a, 3);
46*67e74705SXin Li }
47*67e74705SXin Li 
48*67e74705SXin Li // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
49*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
50*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
51*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
52*67e74705SXin Li // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_s32(int32x2_t a)53*67e74705SXin Li int32_t test_vget_lane_s32(int32x2_t a) {
54*67e74705SXin Li   return vget_lane_s32(a, 1);
55*67e74705SXin Li }
56*67e74705SXin Li 
57*67e74705SXin Li // CHECK-LABEL: define i8 @test_vget_lane_p8(<8 x i8> %a) #0 {
58*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
59*67e74705SXin Li // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_p8(poly8x8_t a)60*67e74705SXin Li poly8_t test_vget_lane_p8(poly8x8_t a) {
61*67e74705SXin Li   return vget_lane_p8(a, 7);
62*67e74705SXin Li }
63*67e74705SXin Li 
64*67e74705SXin Li // CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
65*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
66*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
67*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
68*67e74705SXin Li // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_p16(poly16x4_t a)69*67e74705SXin Li poly16_t test_vget_lane_p16(poly16x4_t a) {
70*67e74705SXin Li   return vget_lane_p16(a, 3);
71*67e74705SXin Li }
72*67e74705SXin Li 
73*67e74705SXin Li // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
74*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
75*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
76*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
77*67e74705SXin Li // CHECK:   ret float [[VGET_LANE]]
test_vget_lane_f32(float32x2_t a)78*67e74705SXin Li float32_t test_vget_lane_f32(float32x2_t a) {
79*67e74705SXin Li   return vget_lane_f32(a, 1);
80*67e74705SXin Li }
81*67e74705SXin Li 
82*67e74705SXin Li // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 {
83*67e74705SXin Li // CHECK:   [[__REINT_242:%.*]] = alloca <4 x half>, align 8
84*67e74705SXin Li // CHECK:   [[__REINT1_242:%.*]] = alloca i16, align 2
85*67e74705SXin Li // CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
86*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
87*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
88*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
89*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
90*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
91*67e74705SXin Li // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
92*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
93*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
94*67e74705SXin Li // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
95*67e74705SXin Li // CHECK:   ret float [[CONV]]
test_vget_lane_f16(float16x4_t a)96*67e74705SXin Li float32_t test_vget_lane_f16(float16x4_t a) {
97*67e74705SXin Li   return vget_lane_f16(a, 1);
98*67e74705SXin Li }
99*67e74705SXin Li 
100*67e74705SXin Li // CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 {
101*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
102*67e74705SXin Li // CHECK:   ret i8 [[VGETQ_LANE]]
test_vgetq_lane_u8(uint8x16_t a)103*67e74705SXin Li uint8_t test_vgetq_lane_u8(uint8x16_t a) {
104*67e74705SXin Li   return vgetq_lane_u8(a, 15);
105*67e74705SXin Li }
106*67e74705SXin Li 
107*67e74705SXin Li // CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
108*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
109*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
110*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
111*67e74705SXin Li // CHECK:   ret i16 [[VGETQ_LANE]]
test_vgetq_lane_u16(uint16x8_t a)112*67e74705SXin Li uint16_t test_vgetq_lane_u16(uint16x8_t a) {
113*67e74705SXin Li   return vgetq_lane_u16(a, 7);
114*67e74705SXin Li }
115*67e74705SXin Li 
116*67e74705SXin Li // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 {
117*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
118*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
119*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
120*67e74705SXin Li // CHECK:   ret i32 [[VGETQ_LANE]]
test_vgetq_lane_u32(uint32x4_t a)121*67e74705SXin Li uint32_t test_vgetq_lane_u32(uint32x4_t a) {
122*67e74705SXin Li   return vgetq_lane_u32(a, 3);
123*67e74705SXin Li }
124*67e74705SXin Li 
125*67e74705SXin Li // CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 {
126*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
127*67e74705SXin Li // CHECK:   ret i8 [[VGETQ_LANE]]
test_vgetq_lane_s8(int8x16_t a)128*67e74705SXin Li int8_t test_vgetq_lane_s8(int8x16_t a) {
129*67e74705SXin Li   return vgetq_lane_s8(a, 15);
130*67e74705SXin Li }
131*67e74705SXin Li 
132*67e74705SXin Li // CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 {
133*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
134*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
135*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
136*67e74705SXin Li // CHECK:   ret i16 [[VGETQ_LANE]]
test_vgetq_lane_s16(int16x8_t a)137*67e74705SXin Li int16_t test_vgetq_lane_s16(int16x8_t a) {
138*67e74705SXin Li   return vgetq_lane_s16(a, 7);
139*67e74705SXin Li }
140*67e74705SXin Li 
141*67e74705SXin Li // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 {
142*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
143*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
144*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
145*67e74705SXin Li // CHECK:   ret i32 [[VGETQ_LANE]]
test_vgetq_lane_s32(int32x4_t a)146*67e74705SXin Li int32_t test_vgetq_lane_s32(int32x4_t a) {
147*67e74705SXin Li   return vgetq_lane_s32(a, 3);
148*67e74705SXin Li }
149*67e74705SXin Li 
150*67e74705SXin Li // CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 {
151*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
152*67e74705SXin Li // CHECK:   ret i8 [[VGETQ_LANE]]
test_vgetq_lane_p8(poly8x16_t a)153*67e74705SXin Li poly8_t test_vgetq_lane_p8(poly8x16_t a) {
154*67e74705SXin Li   return vgetq_lane_p8(a, 15);
155*67e74705SXin Li }
156*67e74705SXin Li 
157*67e74705SXin Li // CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 {
158*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
159*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
160*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
161*67e74705SXin Li // CHECK:   ret i16 [[VGETQ_LANE]]
test_vgetq_lane_p16(poly16x8_t a)162*67e74705SXin Li poly16_t test_vgetq_lane_p16(poly16x8_t a) {
163*67e74705SXin Li   return vgetq_lane_p16(a, 7);
164*67e74705SXin Li }
165*67e74705SXin Li 
166*67e74705SXin Li // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 {
167*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
168*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
169*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
170*67e74705SXin Li // CHECK:   ret float [[VGETQ_LANE]]
test_vgetq_lane_f32(float32x4_t a)171*67e74705SXin Li float32_t test_vgetq_lane_f32(float32x4_t a) {
172*67e74705SXin Li   return vgetq_lane_f32(a, 3);
173*67e74705SXin Li }
174*67e74705SXin Li 
175*67e74705SXin Li // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 {
176*67e74705SXin Li // CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
177*67e74705SXin Li // CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
178*67e74705SXin Li // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
179*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
180*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
181*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
182*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
183*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
184*67e74705SXin Li // CHECK:   store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
185*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
186*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
187*67e74705SXin Li // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
188*67e74705SXin Li // CHECK:   ret float [[CONV]]
test_vgetq_lane_f16(float16x8_t a)189*67e74705SXin Li float32_t test_vgetq_lane_f16(float16x8_t a) {
190*67e74705SXin Li   return vgetq_lane_f16(a, 3);
191*67e74705SXin Li }
192*67e74705SXin Li 
193*67e74705SXin Li // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
194*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
195*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
196*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
197*67e74705SXin Li // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_s64(int64x1_t a)198*67e74705SXin Li int64_t test_vget_lane_s64(int64x1_t a) {
199*67e74705SXin Li   return vget_lane_s64(a, 0);
200*67e74705SXin Li }
201*67e74705SXin Li 
202*67e74705SXin Li // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
203*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
204*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
205*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
206*67e74705SXin Li // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_u64(uint64x1_t a)207*67e74705SXin Li uint64_t test_vget_lane_u64(uint64x1_t a) {
208*67e74705SXin Li   return vget_lane_u64(a, 0);
209*67e74705SXin Li }
210*67e74705SXin Li 
211*67e74705SXin Li // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 {
212*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
213*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
214*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
215*67e74705SXin Li // CHECK:   ret i64 [[VGETQ_LANE]]
test_vgetq_lane_s64(int64x2_t a)216*67e74705SXin Li int64_t test_vgetq_lane_s64(int64x2_t a) {
217*67e74705SXin Li   return vgetq_lane_s64(a, 1);
218*67e74705SXin Li }
219*67e74705SXin Li 
220*67e74705SXin Li // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 {
221*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
222*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
223*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
224*67e74705SXin Li // CHECK:   ret i64 [[VGETQ_LANE]]
test_vgetq_lane_u64(uint64x2_t a)225*67e74705SXin Li uint64_t test_vgetq_lane_u64(uint64x2_t a) {
226*67e74705SXin Li   return vgetq_lane_u64(a, 1);
227*67e74705SXin Li }
228*67e74705SXin Li 
229*67e74705SXin Li 
230*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 %a, <8 x i8> %b) #0 {
231*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
232*67e74705SXin Li // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_u8(uint8_t a,uint8x8_t b)233*67e74705SXin Li uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
234*67e74705SXin Li   return vset_lane_u8(a, b, 7);
235*67e74705SXin Li }
236*67e74705SXin Li 
237*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16> %b) #0 {
238*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
239*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
240*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
241*67e74705SXin Li // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_u16(uint16_t a,uint16x4_t b)242*67e74705SXin Li uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
243*67e74705SXin Li   return vset_lane_u16(a, b, 3);
244*67e74705SXin Li }
245*67e74705SXin Li 
246*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 {
247*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
248*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
249*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
250*67e74705SXin Li // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_u32(uint32_t a,uint32x2_t b)251*67e74705SXin Li uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
252*67e74705SXin Li   return vset_lane_u32(a, b, 1);
253*67e74705SXin Li }
254*67e74705SXin Li 
255*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 %a, <8 x i8> %b) #0 {
256*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
257*67e74705SXin Li // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_s8(int8_t a,int8x8_t b)258*67e74705SXin Li int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
259*67e74705SXin Li   return vset_lane_s8(a, b, 7);
260*67e74705SXin Li }
261*67e74705SXin Li 
262*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16> %b) #0 {
263*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
264*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
265*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
266*67e74705SXin Li // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_s16(int16_t a,int16x4_t b)267*67e74705SXin Li int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
268*67e74705SXin Li   return vset_lane_s16(a, b, 3);
269*67e74705SXin Li }
270*67e74705SXin Li 
271*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 {
272*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
273*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
274*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
275*67e74705SXin Li // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_s32(int32_t a,int32x2_t b)276*67e74705SXin Li int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
277*67e74705SXin Li   return vset_lane_s32(a, b, 1);
278*67e74705SXin Li }
279*67e74705SXin Li 
280*67e74705SXin Li // CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 %a, <8 x i8> %b) #0 {
281*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
282*67e74705SXin Li // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_p8(poly8_t a,poly8x8_t b)283*67e74705SXin Li poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
284*67e74705SXin Li   return vset_lane_p8(a, b, 7);
285*67e74705SXin Li }
286*67e74705SXin Li 
287*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16> %b) #0 {
288*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
289*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
290*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
291*67e74705SXin Li // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_p16(poly16_t a,poly16x4_t b)292*67e74705SXin Li poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
293*67e74705SXin Li   return vset_lane_p16(a, b, 3);
294*67e74705SXin Li }
295*67e74705SXin Li 
296*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 {
297*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
298*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
299*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1
300*67e74705SXin Li // CHECK:   ret <2 x float> [[VSET_LANE]]
test_vset_lane_f32(float32_t a,float32x2_t b)301*67e74705SXin Li float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
302*67e74705SXin Li   return vset_lane_f32(a, b, 1);
303*67e74705SXin Li }
304*67e74705SXin Li 
305*67e74705SXin Li // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 {
306*67e74705SXin Li // CHECK:   [[__REINT_246:%.*]] = alloca half, align 2
307*67e74705SXin Li // CHECK:   [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
308*67e74705SXin Li // CHECK:   [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
309*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
310*67e74705SXin Li // CHECK:   store half [[TMP0]], half* [[__REINT_246]], align 2
311*67e74705SXin Li // CHECK:   store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
312*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
313*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
314*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
315*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
316*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
317*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
318*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3
319*67e74705SXin Li // CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
320*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
321*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
322*67e74705SXin Li // CHECK:   ret <4 x half> [[TMP8]]
test_vset_lane_f16(float16_t * a,float16x4_t b)323*67e74705SXin Li float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
324*67e74705SXin Li   return vset_lane_f16(*a, b, 3);
325*67e74705SXin Li }
326*67e74705SXin Li 
327*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #0 {
328*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
329*67e74705SXin Li // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_u8(uint8_t a,uint8x16_t b)330*67e74705SXin Li uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
331*67e74705SXin Li   return vsetq_lane_u8(a, b, 15);
332*67e74705SXin Li }
333*67e74705SXin Li 
334*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #0 {
335*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
336*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
337*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
338*67e74705SXin Li // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_u16(uint16_t a,uint16x8_t b)339*67e74705SXin Li uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
340*67e74705SXin Li   return vsetq_lane_u16(a, b, 7);
341*67e74705SXin Li }
342*67e74705SXin Li 
343*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 {
344*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
345*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
346*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
347*67e74705SXin Li // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_u32(uint32_t a,uint32x4_t b)348*67e74705SXin Li uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
349*67e74705SXin Li   return vsetq_lane_u32(a, b, 3);
350*67e74705SXin Li }
351*67e74705SXin Li 
352*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #0 {
353*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
354*67e74705SXin Li // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_s8(int8_t a,int8x16_t b)355*67e74705SXin Li int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
356*67e74705SXin Li   return vsetq_lane_s8(a, b, 15);
357*67e74705SXin Li }
358*67e74705SXin Li 
359*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #0 {
360*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
361*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
362*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
363*67e74705SXin Li // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_s16(int16_t a,int16x8_t b)364*67e74705SXin Li int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
365*67e74705SXin Li   return vsetq_lane_s16(a, b, 7);
366*67e74705SXin Li }
367*67e74705SXin Li 
368*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 {
369*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
370*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
371*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
372*67e74705SXin Li // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_s32(int32_t a,int32x4_t b)373*67e74705SXin Li int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
374*67e74705SXin Li   return vsetq_lane_s32(a, b, 3);
375*67e74705SXin Li }
376*67e74705SXin Li 
377*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #0 {
378*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
379*67e74705SXin Li // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_p8(poly8_t a,poly8x16_t b)380*67e74705SXin Li poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
381*67e74705SXin Li   return vsetq_lane_p8(a, b, 15);
382*67e74705SXin Li }
383*67e74705SXin Li 
384*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #0 {
385*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
386*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
387*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
388*67e74705SXin Li // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_p16(poly16_t a,poly16x8_t b)389*67e74705SXin Li poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
390*67e74705SXin Li   return vsetq_lane_p16(a, b, 7);
391*67e74705SXin Li }
392*67e74705SXin Li 
393*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 {
394*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
395*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
396*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
397*67e74705SXin Li // CHECK:   ret <4 x float> [[VSET_LANE]]
test_vsetq_lane_f32(float32_t a,float32x4_t b)398*67e74705SXin Li float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
399*67e74705SXin Li   return vsetq_lane_f32(a, b, 3);
400*67e74705SXin Li }
401*67e74705SXin Li 
402*67e74705SXin Li // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 {
403*67e74705SXin Li // CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
404*67e74705SXin Li // CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
405*67e74705SXin Li // CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
406*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
407*67e74705SXin Li // CHECK:   store half [[TMP0]], half* [[__REINT_248]], align 2
408*67e74705SXin Li // CHECK:   store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
409*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
410*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
411*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
412*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
413*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
414*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
415*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7
416*67e74705SXin Li // CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
417*67e74705SXin Li // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
418*67e74705SXin Li // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
419*67e74705SXin Li // CHECK:   ret <8 x half> [[TMP8]]
test_vsetq_lane_f16(float16_t * a,float16x8_t b)420*67e74705SXin Li float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
421*67e74705SXin Li   return vsetq_lane_f16(*a, b, 7);
422*67e74705SXin Li }
423*67e74705SXin Li 
424*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 {
425*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
426*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
427*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
428*67e74705SXin Li // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_s64(int64_t a,int64x1_t b)429*67e74705SXin Li int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
430*67e74705SXin Li   return vset_lane_s64(a, b, 0);
431*67e74705SXin Li }
432*67e74705SXin Li 
433*67e74705SXin Li // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 {
434*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
435*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
436*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
437*67e74705SXin Li // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_u64(uint64_t a,uint64x1_t b)438*67e74705SXin Li uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
439*67e74705SXin Li   return vset_lane_u64(a, b, 0);
440*67e74705SXin Li }
441*67e74705SXin Li 
442*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 {
443*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
444*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
445*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
446*67e74705SXin Li // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_s64(int64_t a,int64x2_t b)447*67e74705SXin Li int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
448*67e74705SXin Li   return vsetq_lane_s64(a, b, 1);
449*67e74705SXin Li }
450*67e74705SXin Li 
451*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 {
452*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
453*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
454*67e74705SXin Li // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
455*67e74705SXin Li // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_u64(uint64_t a,uint64x2_t b)456*67e74705SXin Li uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
457*67e74705SXin Li   return vsetq_lane_u64(a, b, 1);
458*67e74705SXin Li }
459