xref: /aosp_15_r20/external/clang/test/CodeGen/aarch64-neon-scalar-copy.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2*67e74705SXin Li // RUN:  -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3*67e74705SXin Li 
4*67e74705SXin Li 
5*67e74705SXin Li #include <arm_neon.h>
6*67e74705SXin Li 
7*67e74705SXin Li // CHECK-LABEL: define float @test_vdups_lane_f32(<2 x float> %a) #0 {
8*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
10*67e74705SXin Li // CHECK:   [[VDUPS_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
11*67e74705SXin Li // CHECK:   ret float [[VDUPS_LANE]]
test_vdups_lane_f32(float32x2_t a)12*67e74705SXin Li float32_t test_vdups_lane_f32(float32x2_t a) {
13*67e74705SXin Li   return vdups_lane_f32(a, 1);
14*67e74705SXin Li }
15*67e74705SXin Li 
16*67e74705SXin Li 
17*67e74705SXin Li // CHECK-LABEL: define double @test_vdupd_lane_f64(<1 x double> %a) #0 {
18*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
19*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
20*67e74705SXin Li // CHECK:   [[VDUPD_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
21*67e74705SXin Li // CHECK:   ret double [[VDUPD_LANE]]
test_vdupd_lane_f64(float64x1_t a)22*67e74705SXin Li float64_t test_vdupd_lane_f64(float64x1_t a) {
23*67e74705SXin Li   return vdupd_lane_f64(a, 0);
24*67e74705SXin Li }
25*67e74705SXin Li 
26*67e74705SXin Li 
27*67e74705SXin Li // CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #0 {
28*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
29*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
30*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
31*67e74705SXin Li // CHECK:   ret float [[VGETQ_LANE]]
test_vdups_laneq_f32(float32x4_t a)32*67e74705SXin Li float32_t test_vdups_laneq_f32(float32x4_t a) {
33*67e74705SXin Li   return vdups_laneq_f32(a, 3);
34*67e74705SXin Li }
35*67e74705SXin Li 
36*67e74705SXin Li 
37*67e74705SXin Li // CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #0 {
38*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
39*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
40*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
41*67e74705SXin Li // CHECK:   ret double [[VGETQ_LANE]]
test_vdupd_laneq_f64(float64x2_t a)42*67e74705SXin Li float64_t test_vdupd_laneq_f64(float64x2_t a) {
43*67e74705SXin Li   return vdupd_laneq_f64(a, 1);
44*67e74705SXin Li }
45*67e74705SXin Li 
46*67e74705SXin Li 
47*67e74705SXin Li // CHECK-LABEL: define i8 @test_vdupb_lane_s8(<8 x i8> %a) #0 {
48*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
49*67e74705SXin Li // CHECK:   ret i8 [[VGET_LANE]]
test_vdupb_lane_s8(int8x8_t a)50*67e74705SXin Li int8_t test_vdupb_lane_s8(int8x8_t a) {
51*67e74705SXin Li   return vdupb_lane_s8(a, 7);
52*67e74705SXin Li }
53*67e74705SXin Li 
54*67e74705SXin Li 
55*67e74705SXin Li // CHECK-LABEL: define i16 @test_vduph_lane_s16(<4 x i16> %a) #0 {
56*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
57*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
58*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
59*67e74705SXin Li // CHECK:   ret i16 [[VGET_LANE]]
test_vduph_lane_s16(int16x4_t a)60*67e74705SXin Li int16_t test_vduph_lane_s16(int16x4_t a) {
61*67e74705SXin Li   return vduph_lane_s16(a, 3);
62*67e74705SXin Li }
63*67e74705SXin Li 
64*67e74705SXin Li 
65*67e74705SXin Li // CHECK-LABEL: define i32 @test_vdups_lane_s32(<2 x i32> %a) #0 {
66*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
67*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
68*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
69*67e74705SXin Li // CHECK:   ret i32 [[VGET_LANE]]
test_vdups_lane_s32(int32x2_t a)70*67e74705SXin Li int32_t test_vdups_lane_s32(int32x2_t a) {
71*67e74705SXin Li   return vdups_lane_s32(a, 1);
72*67e74705SXin Li }
73*67e74705SXin Li 
74*67e74705SXin Li 
75*67e74705SXin Li // CHECK-LABEL: define i64 @test_vdupd_lane_s64(<1 x i64> %a) #0 {
76*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
77*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
78*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
79*67e74705SXin Li // CHECK:   ret i64 [[VGET_LANE]]
test_vdupd_lane_s64(int64x1_t a)80*67e74705SXin Li int64_t test_vdupd_lane_s64(int64x1_t a) {
81*67e74705SXin Li   return vdupd_lane_s64(a, 0);
82*67e74705SXin Li }
83*67e74705SXin Li 
84*67e74705SXin Li 
85*67e74705SXin Li // CHECK-LABEL: define i8 @test_vdupb_lane_u8(<8 x i8> %a) #0 {
86*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
87*67e74705SXin Li // CHECK:   ret i8 [[VGET_LANE]]
test_vdupb_lane_u8(uint8x8_t a)88*67e74705SXin Li uint8_t test_vdupb_lane_u8(uint8x8_t a) {
89*67e74705SXin Li   return vdupb_lane_u8(a, 7);
90*67e74705SXin Li }
91*67e74705SXin Li 
92*67e74705SXin Li 
93*67e74705SXin Li // CHECK-LABEL: define i16 @test_vduph_lane_u16(<4 x i16> %a) #0 {
94*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
95*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
96*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
97*67e74705SXin Li // CHECK:   ret i16 [[VGET_LANE]]
test_vduph_lane_u16(uint16x4_t a)98*67e74705SXin Li uint16_t test_vduph_lane_u16(uint16x4_t a) {
99*67e74705SXin Li   return vduph_lane_u16(a, 3);
100*67e74705SXin Li }
101*67e74705SXin Li 
102*67e74705SXin Li 
103*67e74705SXin Li // CHECK-LABEL: define i32 @test_vdups_lane_u32(<2 x i32> %a) #0 {
104*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
105*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
106*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
107*67e74705SXin Li // CHECK:   ret i32 [[VGET_LANE]]
test_vdups_lane_u32(uint32x2_t a)108*67e74705SXin Li uint32_t test_vdups_lane_u32(uint32x2_t a) {
109*67e74705SXin Li   return vdups_lane_u32(a, 1);
110*67e74705SXin Li }
111*67e74705SXin Li 
112*67e74705SXin Li 
113*67e74705SXin Li // CHECK-LABEL: define i64 @test_vdupd_lane_u64(<1 x i64> %a) #0 {
114*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
115*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
116*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
117*67e74705SXin Li // CHECK:   ret i64 [[VGET_LANE]]
test_vdupd_lane_u64(uint64x1_t a)118*67e74705SXin Li uint64_t test_vdupd_lane_u64(uint64x1_t a) {
119*67e74705SXin Li   return vdupd_lane_u64(a, 0);
120*67e74705SXin Li }
121*67e74705SXin Li 
122*67e74705SXin Li // CHECK-LABEL: define i8 @test_vdupb_laneq_s8(<16 x i8> %a) #0 {
123*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
124*67e74705SXin Li // CHECK:   ret i8 [[VGETQ_LANE]]
test_vdupb_laneq_s8(int8x16_t a)125*67e74705SXin Li int8_t test_vdupb_laneq_s8(int8x16_t a) {
126*67e74705SXin Li   return vdupb_laneq_s8(a, 15);
127*67e74705SXin Li }
128*67e74705SXin Li 
129*67e74705SXin Li 
130*67e74705SXin Li // CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #0 {
131*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
132*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
133*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
134*67e74705SXin Li // CHECK:   ret i16 [[VGETQ_LANE]]
test_vduph_laneq_s16(int16x8_t a)135*67e74705SXin Li int16_t test_vduph_laneq_s16(int16x8_t a) {
136*67e74705SXin Li   return vduph_laneq_s16(a, 7);
137*67e74705SXin Li }
138*67e74705SXin Li 
139*67e74705SXin Li 
140*67e74705SXin Li // CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #0 {
141*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
142*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
143*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
144*67e74705SXin Li // CHECK:   ret i32 [[VGETQ_LANE]]
test_vdups_laneq_s32(int32x4_t a)145*67e74705SXin Li int32_t test_vdups_laneq_s32(int32x4_t a) {
146*67e74705SXin Li   return vdups_laneq_s32(a, 3);
147*67e74705SXin Li }
148*67e74705SXin Li 
149*67e74705SXin Li 
150*67e74705SXin Li // CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #0 {
151*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
152*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
153*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
154*67e74705SXin Li // CHECK:   ret i64 [[VGETQ_LANE]]
test_vdupd_laneq_s64(int64x2_t a)155*67e74705SXin Li int64_t test_vdupd_laneq_s64(int64x2_t a) {
156*67e74705SXin Li   return vdupd_laneq_s64(a, 1);
157*67e74705SXin Li }
158*67e74705SXin Li 
159*67e74705SXin Li 
160*67e74705SXin Li // CHECK-LABEL: define i8 @test_vdupb_laneq_u8(<16 x i8> %a) #0 {
161*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
162*67e74705SXin Li // CHECK:   ret i8 [[VGETQ_LANE]]
test_vdupb_laneq_u8(uint8x16_t a)163*67e74705SXin Li uint8_t test_vdupb_laneq_u8(uint8x16_t a) {
164*67e74705SXin Li   return vdupb_laneq_u8(a, 15);
165*67e74705SXin Li }
166*67e74705SXin Li 
167*67e74705SXin Li 
168*67e74705SXin Li // CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #0 {
169*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
170*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
171*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
172*67e74705SXin Li // CHECK:   ret i16 [[VGETQ_LANE]]
test_vduph_laneq_u16(uint16x8_t a)173*67e74705SXin Li uint16_t test_vduph_laneq_u16(uint16x8_t a) {
174*67e74705SXin Li   return vduph_laneq_u16(a, 7);
175*67e74705SXin Li }
176*67e74705SXin Li 
177*67e74705SXin Li 
178*67e74705SXin Li // CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #0 {
179*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
180*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
181*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
182*67e74705SXin Li // CHECK:   ret i32 [[VGETQ_LANE]]
test_vdups_laneq_u32(uint32x4_t a)183*67e74705SXin Li uint32_t test_vdups_laneq_u32(uint32x4_t a) {
184*67e74705SXin Li   return vdups_laneq_u32(a, 3);
185*67e74705SXin Li }
186*67e74705SXin Li 
187*67e74705SXin Li 
188*67e74705SXin Li // CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #0 {
189*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
190*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
191*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
192*67e74705SXin Li // CHECK:   ret i64 [[VGETQ_LANE]]
test_vdupd_laneq_u64(uint64x2_t a)193*67e74705SXin Li uint64_t test_vdupd_laneq_u64(uint64x2_t a) {
194*67e74705SXin Li   return vdupd_laneq_u64(a, 1);
195*67e74705SXin Li }
196*67e74705SXin Li 
197*67e74705SXin Li // CHECK-LABEL: define i8 @test_vdupb_lane_p8(<8 x i8> %a) #0 {
198*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
199*67e74705SXin Li // CHECK:   ret i8 [[VGET_LANE]]
test_vdupb_lane_p8(poly8x8_t a)200*67e74705SXin Li poly8_t test_vdupb_lane_p8(poly8x8_t a) {
201*67e74705SXin Li   return vdupb_lane_p8(a, 7);
202*67e74705SXin Li }
203*67e74705SXin Li 
204*67e74705SXin Li // CHECK-LABEL: define i16 @test_vduph_lane_p16(<4 x i16> %a) #0 {
205*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
206*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
207*67e74705SXin Li // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
208*67e74705SXin Li // CHECK:   ret i16 [[VGET_LANE]]
test_vduph_lane_p16(poly16x4_t a)209*67e74705SXin Li poly16_t test_vduph_lane_p16(poly16x4_t a) {
210*67e74705SXin Li   return vduph_lane_p16(a, 3);
211*67e74705SXin Li }
212*67e74705SXin Li 
213*67e74705SXin Li // CHECK-LABEL: define i8 @test_vdupb_laneq_p8(<16 x i8> %a) #0 {
214*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
215*67e74705SXin Li // CHECK:   ret i8 [[VGETQ_LANE]]
test_vdupb_laneq_p8(poly8x16_t a)216*67e74705SXin Li poly8_t test_vdupb_laneq_p8(poly8x16_t a) {
217*67e74705SXin Li   return vdupb_laneq_p8(a, 15);
218*67e74705SXin Li }
219*67e74705SXin Li 
220*67e74705SXin Li // CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #0 {
221*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
222*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
223*67e74705SXin Li // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
224*67e74705SXin Li // CHECK:   ret i16 [[VGETQ_LANE]]
test_vduph_laneq_p16(poly16x8_t a)225*67e74705SXin Li poly16_t test_vduph_laneq_p16(poly16x8_t a) {
226*67e74705SXin Li   return vduph_laneq_p16(a, 7);
227*67e74705SXin Li }
228*67e74705SXin Li 
229