1 // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
2 // RUN: -target-cpu swift -fallow-half-arguments-and-returns -ffreestanding -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg | FileCheck %s
4
5 // REQUIRES: long-tests
6
7 #include <arm_neon.h>
8
9 // CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
10 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4
11 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
12 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t a,int8x8_t b,int8x8_t c)13 int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
14 return vaba_s8(a, b, c);
15 }
16
17 // CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
18 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
20 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
21 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
22 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4
23 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
24 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16>
25 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[TMP2]]
26 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t a,int16x4_t b,int16x4_t c)27 int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
28 return vaba_s16(a, b, c);
29 }
30
31 // CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
32 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
33 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
34 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
35 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
36 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4
37 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
38 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32>
39 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[TMP2]]
40 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t a,int32x2_t b,int32x2_t c)41 int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
42 return vaba_s32(a, b, c);
43 }
44
45 // CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
46 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4
47 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
48 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)49 uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
50 return vaba_u8(a, b, c);
51 }
52
53 // CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
54 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
55 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
56 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
57 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
58 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4
59 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
60 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16>
61 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[TMP2]]
62 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)63 uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
64 return vaba_u16(a, b, c);
65 }
66
67 // CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
68 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
69 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
70 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
71 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
72 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4
73 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
74 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32>
75 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[TMP2]]
76 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)77 uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
78 return vaba_u32(a, b, c);
79 }
80
81 // CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
82 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c) #4
83 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
84 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)85 int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
86 return vabaq_s8(a, b, c);
87 }
88
89 // CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
90 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
91 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
92 // CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
93 // CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
94 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I_I]], <8 x i16> [[VABDQ_V1_I_I]]) #4
95 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
96 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <8 x i16>
97 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP2]]
98 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)99 int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
100 return vabaq_s16(a, b, c);
101 }
102
103 // CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
104 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
105 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
106 // CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
107 // CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
108 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I_I]], <4 x i32> [[VABDQ_V1_I_I]]) #4
109 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
110 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <4 x i32>
111 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
112 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)113 int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
114 return vabaq_s32(a, b, c);
115 }
116
117 // CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
118 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c) #4
119 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
120 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)121 uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
122 return vabaq_u8(a, b, c);
123 }
124
125 // CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
126 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
127 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
128 // CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
129 // CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
130 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I_I]], <8 x i16> [[VABDQ_V1_I_I]]) #4
131 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
132 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <8 x i16>
133 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP2]]
134 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)135 uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
136 return vabaq_u16(a, b, c);
137 }
138
139 // CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
140 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
141 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
142 // CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
143 // CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
144 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I_I]], <4 x i32> [[VABDQ_V1_I_I]]) #4
145 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
146 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <4 x i32>
147 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
148 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)149 uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
150 return vabaq_u32(a, b, c);
151 }
152
153
154 // CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
155 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4
156 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
157 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
158 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)159 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
160 return vabal_s8(a, b, c);
161 }
162
163 // CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
164 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
165 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
166 // CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
167 // CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
168 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I_I]], <4 x i16> [[VABD_V1_I_I_I]]) #4
169 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
170 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <4 x i16>
171 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
172 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
173 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
174 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
175 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)176 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
177 return vabal_s16(a, b, c);
178 }
179
180 // CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
182 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
183 // CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
184 // CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
185 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I_I]], <2 x i32> [[VABD_V1_I_I_I]]) #4
186 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
187 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <2 x i32>
188 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
189 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
190 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
191 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
192 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)193 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
194 return vabal_s32(a, b, c);
195 }
196
197 // CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
198 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4
199 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
200 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
201 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)202 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
203 return vabal_u8(a, b, c);
204 }
205
206 // CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
207 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
208 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
209 // CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
210 // CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
211 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I_I]], <4 x i16> [[VABD_V1_I_I_I]]) #4
212 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
213 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <4 x i16>
214 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
215 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
216 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
217 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
218 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)219 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
220 return vabal_u16(a, b, c);
221 }
222
223 // CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
224 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
225 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
226 // CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
227 // CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
228 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I_I]], <2 x i32> [[VABD_V1_I_I_I]]) #4
229 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
230 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <2 x i32>
231 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
232 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
233 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
234 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
235 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)236 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
237 return vabal_u32(a, b, c);
238 }
239
240
241 // CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
242 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4
243 // CHECK: ret <8 x i8> [[VABD_V_I]]
test_vabd_s8(int8x8_t a,int8x8_t b)244 int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) {
245 return vabd_s8(a, b);
246 }
247
248 // CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
249 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
250 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
251 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
252 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
253 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]]) #4
254 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
255 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16>
256 // CHECK: ret <4 x i16> [[TMP2]]
test_vabd_s16(int16x4_t a,int16x4_t b)257 int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) {
258 return vabd_s16(a, b);
259 }
260
261 // CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
262 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
263 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
264 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
265 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
266 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]]) #4
267 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
268 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32>
269 // CHECK: ret <2 x i32> [[TMP2]]
test_vabd_s32(int32x2_t a,int32x2_t b)270 int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) {
271 return vabd_s32(a, b);
272 }
273
274 // CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
275 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
276 // CHECK: ret <8 x i8> [[VABD_V_I]]
test_vabd_u8(uint8x8_t a,uint8x8_t b)277 uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) {
278 return vabd_u8(a, b);
279 }
280
281 // CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
282 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
283 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
284 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
285 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
286 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]]) #4
287 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
288 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16>
289 // CHECK: ret <4 x i16> [[TMP2]]
test_vabd_u16(uint16x4_t a,uint16x4_t b)290 uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) {
291 return vabd_u16(a, b);
292 }
293
294 // CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
295 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
296 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
297 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
298 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
299 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]]) #4
300 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
301 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32>
302 // CHECK: ret <2 x i32> [[TMP2]]
test_vabd_u32(uint32x2_t a,uint32x2_t b)303 uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) {
304 return vabd_u32(a, b);
305 }
306
307 // CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %a, <2 x float> %b) #0 {
308 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
309 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
310 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
311 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
312 // CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> [[VABD_V_I]], <2 x float> [[VABD_V1_I]]) #4
313 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8>
314 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x float>
315 // CHECK: ret <2 x float> [[TMP2]]
test_vabd_f32(float32x2_t a,float32x2_t b)316 float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) {
317 return vabd_f32(a, b);
318 }
319
320 // CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
321 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b) #4
322 // CHECK: ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_s8(int8x16_t a,int8x16_t b)323 int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) {
324 return vabdq_s8(a, b);
325 }
326
327 // CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
328 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
329 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
330 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
331 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
332 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]]) #4
333 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
334 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16>
335 // CHECK: ret <8 x i16> [[TMP2]]
test_vabdq_s16(int16x8_t a,int16x8_t b)336 int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) {
337 return vabdq_s16(a, b);
338 }
339
340 // CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
341 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
342 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
343 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
344 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
345 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]]) #4
346 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
347 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32>
348 // CHECK: ret <4 x i32> [[TMP2]]
test_vabdq_s32(int32x4_t a,int32x4_t b)349 int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) {
350 return vabdq_s32(a, b);
351 }
352
353 // CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
354 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
355 // CHECK: ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_u8(uint8x16_t a,uint8x16_t b)356 uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) {
357 return vabdq_u8(a, b);
358 }
359
360 // CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
361 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
362 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
363 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
364 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
365 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]]) #4
366 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
367 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16>
368 // CHECK: ret <8 x i16> [[TMP2]]
test_vabdq_u16(uint16x8_t a,uint16x8_t b)369 uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) {
370 return vabdq_u16(a, b);
371 }
372
373 // CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
374 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
375 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
376 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
377 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
378 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]]) #4
379 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
380 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32>
381 // CHECK: ret <4 x i32> [[TMP2]]
test_vabdq_u32(uint32x4_t a,uint32x4_t b)382 uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) {
383 return vabdq_u32(a, b);
384 }
385
386 // CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) #0 {
387 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
388 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
389 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
390 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
391 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> [[VABDQ_V_I]], <4 x float> [[VABDQ_V1_I]]) #4
392 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8>
393 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x float>
394 // CHECK: ret <4 x float> [[TMP2]]
test_vabdq_f32(float32x4_t a,float32x4_t b)395 float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) {
396 return vabdq_f32(a, b);
397 }
398
399
400 // CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
401 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4
402 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
403 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)404 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
405 return vabdl_s8(a, b);
406 }
407
408 // CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
409 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
410 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
411 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
412 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
413 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4
414 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
415 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16>
416 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
417 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
418 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
419 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)420 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
421 return vabdl_s16(a, b);
422 }
423
424 // CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
425 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
426 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
427 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
428 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
429 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4
430 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
431 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32>
432 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
433 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
434 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
435 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)436 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
437 return vabdl_s32(a, b);
438 }
439
440 // CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
441 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
442 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
443 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)444 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
445 return vabdl_u8(a, b);
446 }
447
448 // CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
449 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
450 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
451 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
452 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
453 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4
454 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
455 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16>
456 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
457 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
458 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
459 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)460 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
461 return vabdl_u16(a, b);
462 }
463
464 // CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
465 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
466 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
467 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
468 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
469 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4
470 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
471 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32>
472 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
473 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
474 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
475 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)476 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
477 return vabdl_u32(a, b);
478 }
479
480
481 // CHECK-LABEL: define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
482 // CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4
483 // CHECK: ret <8 x i8> [[VABS_I]]
test_vabs_s8(int8x8_t a)484 int8x8_t test_vabs_s8(int8x8_t a) {
485 return vabs_s8(a);
486 }
487
488 // CHECK-LABEL: define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
489 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
490 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
491 // CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> [[VABS_I]]) #4
492 // CHECK: ret <4 x i16> [[VABS1_I]]
test_vabs_s16(int16x4_t a)493 int16x4_t test_vabs_s16(int16x4_t a) {
494 return vabs_s16(a);
495 }
496
497 // CHECK-LABEL: define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
498 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
499 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
500 // CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> [[VABS_I]]) #4
501 // CHECK: ret <2 x i32> [[VABS1_I]]
test_vabs_s32(int32x2_t a)502 int32x2_t test_vabs_s32(int32x2_t a) {
503 return vabs_s32(a);
504 }
505
506 // CHECK-LABEL: define <2 x float> @test_vabs_f32(<2 x float> %a) #0 {
507 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
508 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
509 // CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]]) #4
510 // CHECK: ret <2 x float> [[VABS1_I]]
test_vabs_f32(float32x2_t a)511 float32x2_t test_vabs_f32(float32x2_t a) {
512 return vabs_f32(a);
513 }
514
515 // CHECK-LABEL: define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
516 // CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4
517 // CHECK: ret <16 x i8> [[VABS_I]]
test_vabsq_s8(int8x16_t a)518 int8x16_t test_vabsq_s8(int8x16_t a) {
519 return vabsq_s8(a);
520 }
521
522 // CHECK-LABEL: define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
523 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
524 // CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
525 // CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> [[VABS_I]]) #4
526 // CHECK: ret <8 x i16> [[VABS1_I]]
test_vabsq_s16(int16x8_t a)527 int16x8_t test_vabsq_s16(int16x8_t a) {
528 return vabsq_s16(a);
529 }
530
531 // CHECK-LABEL: define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
532 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
533 // CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
534 // CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> [[VABS_I]]) #4
535 // CHECK: ret <4 x i32> [[VABS1_I]]
test_vabsq_s32(int32x4_t a)536 int32x4_t test_vabsq_s32(int32x4_t a) {
537 return vabsq_s32(a);
538 }
539
540 // CHECK-LABEL: define <4 x float> @test_vabsq_f32(<4 x float> %a) #0 {
541 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
542 // CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
543 // CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]]) #4
544 // CHECK: ret <4 x float> [[VABS1_I]]
test_vabsq_f32(float32x4_t a)545 float32x4_t test_vabsq_f32(float32x4_t a) {
546 return vabsq_f32(a);
547 }
548
549
550 // CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
551 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
552 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t a,int8x8_t b)553 int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) {
554 return vadd_s8(a, b);
555 }
556
557 // CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
558 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
559 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t a,int16x4_t b)560 int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) {
561 return vadd_s16(a, b);
562 }
563
564 // CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
565 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
566 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t a,int32x2_t b)567 int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) {
568 return vadd_s32(a, b);
569 }
570
571 // CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
572 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
573 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t a,int64x1_t b)574 int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) {
575 return vadd_s64(a, b);
576 }
577
578 // CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %a, <2 x float> %b) #0 {
579 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, %b
580 // CHECK: ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t a,float32x2_t b)581 float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) {
582 return vadd_f32(a, b);
583 }
584
585 // CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
586 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
587 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t a,uint8x8_t b)588 uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) {
589 return vadd_u8(a, b);
590 }
591
592 // CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
593 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
594 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t a,uint16x4_t b)595 uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) {
596 return vadd_u16(a, b);
597 }
598
599 // CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
600 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
601 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t a,uint32x2_t b)602 uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) {
603 return vadd_u32(a, b);
604 }
605
606 // CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
607 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
608 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t a,uint64x1_t b)609 uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) {
610 return vadd_u64(a, b);
611 }
612
613 // CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
614 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
615 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t a,int8x16_t b)616 int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) {
617 return vaddq_s8(a, b);
618 }
619
620 // CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
621 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
622 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t a,int16x8_t b)623 int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) {
624 return vaddq_s16(a, b);
625 }
626
627 // CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
628 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
629 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t a,int32x4_t b)630 int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) {
631 return vaddq_s32(a, b);
632 }
633
634 // CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
635 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
636 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t a,int64x2_t b)637 int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) {
638 return vaddq_s64(a, b);
639 }
640
641 // CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) #0 {
642 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, %b
643 // CHECK: ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t a,float32x4_t b)644 float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) {
645 return vaddq_f32(a, b);
646 }
647
648 // CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
649 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
650 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t a,uint8x16_t b)651 uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) {
652 return vaddq_u8(a, b);
653 }
654
655 // CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
656 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
657 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t a,uint16x8_t b)658 uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) {
659 return vaddq_u16(a, b);
660 }
661
662 // CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
663 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
664 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t a,uint32x4_t b)665 uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) {
666 return vaddq_u32(a, b);
667 }
668
669 // CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
670 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
671 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t a,uint64x2_t b)672 uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) {
673 return vaddq_u64(a, b);
674 }
675
676
677 // CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
678 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
679 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
680 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
681 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
682 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
683 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
684 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
685 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)686 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
687 return vaddhn_s16(a, b);
688 }
689
690 // CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
691 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
692 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
693 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
694 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
695 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
696 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
697 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
698 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)699 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
700 return vaddhn_s32(a, b);
701 }
702
703 // CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
704 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
705 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
706 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
707 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
708 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
709 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
710 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
711 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)712 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
713 return vaddhn_s64(a, b);
714 }
715
716 // CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
717 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
718 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
719 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
720 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
721 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
722 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
723 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
724 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)725 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
726 return vaddhn_u16(a, b);
727 }
728
729 // CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
730 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
731 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
732 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
733 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
734 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
735 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
736 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
737 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)738 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
739 return vaddhn_u32(a, b);
740 }
741
742 // CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
743 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
744 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
745 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
746 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
747 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
748 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
749 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
750 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)751 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
752 return vaddhn_u64(a, b);
753 }
754
755
756 // CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
757 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
758 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
759 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
760 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)761 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
762 return vaddl_s8(a, b);
763 }
764
765 // CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
766 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
767 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
768 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
769 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
770 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
771 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
772 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
773 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)774 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
775 return vaddl_s16(a, b);
776 }
777
778 // CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
779 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
780 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
781 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
782 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
783 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
784 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
785 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
786 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)787 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
788 return vaddl_s32(a, b);
789 }
790
791 // CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
792 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
793 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
794 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
795 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)796 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
797 return vaddl_u8(a, b);
798 }
799
800 // CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
801 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
802 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
803 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
804 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
805 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
806 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
807 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
808 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)809 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
810 return vaddl_u16(a, b);
811 }
812
813 // CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
814 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
815 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
816 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
817 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
818 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
819 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
820 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
821 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)822 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
823 return vaddl_u32(a, b);
824 }
825
826
827 // CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
828 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
829 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
830 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)831 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
832 return vaddw_s8(a, b);
833 }
834
835 // CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
836 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
837 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
838 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
839 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
840 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)841 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
842 return vaddw_s16(a, b);
843 }
844
845 // CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
846 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
847 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
848 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
849 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
850 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)851 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
852 return vaddw_s32(a, b);
853 }
854
855 // CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
856 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
857 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
858 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)859 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
860 return vaddw_u8(a, b);
861 }
862
863 // CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
864 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
865 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
866 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
867 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
868 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)869 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
870 return vaddw_u16(a, b);
871 }
872
873 // CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
874 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
875 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
876 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
877 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
878 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)879 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
880 return vaddw_u32(a, b);
881 }
882
883
884 // CHECK-LABEL: define <8 x i8> @test_vand_s8(<8 x i8> %a, <8 x i8> %b) #0 {
885 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
886 // CHECK: ret <8 x i8> [[AND_I]]
test_vand_s8(int8x8_t a,int8x8_t b)887 int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
888 return vand_s8(a, b);
889 }
890
891 // CHECK-LABEL: define <4 x i16> @test_vand_s16(<4 x i16> %a, <4 x i16> %b) #0 {
892 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
893 // CHECK: ret <4 x i16> [[AND_I]]
test_vand_s16(int16x4_t a,int16x4_t b)894 int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
895 return vand_s16(a, b);
896 }
897
898 // CHECK-LABEL: define <2 x i32> @test_vand_s32(<2 x i32> %a, <2 x i32> %b) #0 {
899 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
900 // CHECK: ret <2 x i32> [[AND_I]]
test_vand_s32(int32x2_t a,int32x2_t b)901 int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
902 return vand_s32(a, b);
903 }
904
905 // CHECK-LABEL: define <1 x i64> @test_vand_s64(<1 x i64> %a, <1 x i64> %b) #0 {
906 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
907 // CHECK: ret <1 x i64> [[AND_I]]
test_vand_s64(int64x1_t a,int64x1_t b)908 int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
909 return vand_s64(a, b);
910 }
911
912 // CHECK-LABEL: define <8 x i8> @test_vand_u8(<8 x i8> %a, <8 x i8> %b) #0 {
913 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
914 // CHECK: ret <8 x i8> [[AND_I]]
test_vand_u8(uint8x8_t a,uint8x8_t b)915 uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
916 return vand_u8(a, b);
917 }
918
919 // CHECK-LABEL: define <4 x i16> @test_vand_u16(<4 x i16> %a, <4 x i16> %b) #0 {
920 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
921 // CHECK: ret <4 x i16> [[AND_I]]
test_vand_u16(uint16x4_t a,uint16x4_t b)922 uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
923 return vand_u16(a, b);
924 }
925
926 // CHECK-LABEL: define <2 x i32> @test_vand_u32(<2 x i32> %a, <2 x i32> %b) #0 {
927 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
928 // CHECK: ret <2 x i32> [[AND_I]]
test_vand_u32(uint32x2_t a,uint32x2_t b)929 uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
930 return vand_u32(a, b);
931 }
932
933 // CHECK-LABEL: define <1 x i64> @test_vand_u64(<1 x i64> %a, <1 x i64> %b) #0 {
934 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
935 // CHECK: ret <1 x i64> [[AND_I]]
test_vand_u64(uint64x1_t a,uint64x1_t b)936 uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
937 return vand_u64(a, b);
938 }
939
940 // CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
941 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
942 // CHECK: ret <16 x i8> [[AND_I]]
test_vandq_s8(int8x16_t a,int8x16_t b)943 int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
944 return vandq_s8(a, b);
945 }
946
947 // CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
948 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
949 // CHECK: ret <8 x i16> [[AND_I]]
test_vandq_s16(int16x8_t a,int16x8_t b)950 int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
951 return vandq_s16(a, b);
952 }
953
954 // CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
955 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
956 // CHECK: ret <4 x i32> [[AND_I]]
test_vandq_s32(int32x4_t a,int32x4_t b)957 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
958 return vandq_s32(a, b);
959 }
960
961 // CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
962 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
963 // CHECK: ret <2 x i64> [[AND_I]]
test_vandq_s64(int64x2_t a,int64x2_t b)964 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
965 return vandq_s64(a, b);
966 }
967
968 // CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
969 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
970 // CHECK: ret <16 x i8> [[AND_I]]
test_vandq_u8(uint8x16_t a,uint8x16_t b)971 uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
972 return vandq_u8(a, b);
973 }
974
975 // CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
976 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
977 // CHECK: ret <8 x i16> [[AND_I]]
test_vandq_u16(uint16x8_t a,uint16x8_t b)978 uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
979 return vandq_u16(a, b);
980 }
981
982 // CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
983 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
984 // CHECK: ret <4 x i32> [[AND_I]]
test_vandq_u32(uint32x4_t a,uint32x4_t b)985 uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
986 return vandq_u32(a, b);
987 }
988
989 // CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
990 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
991 // CHECK: ret <2 x i64> [[AND_I]]
test_vandq_u64(uint64x2_t a,uint64x2_t b)992 uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
993 return vandq_u64(a, b);
994 }
995
996
997 // CHECK-LABEL: define <8 x i8> @test_vbic_s8(<8 x i8> %a, <8 x i8> %b) #0 {
998 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
999 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
1000 // CHECK: ret <8 x i8> [[AND_I]]
test_vbic_s8(int8x8_t a,int8x8_t b)1001 int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
1002 return vbic_s8(a, b);
1003 }
1004
1005 // CHECK-LABEL: define <4 x i16> @test_vbic_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1006 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
1007 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
1008 // CHECK: ret <4 x i16> [[AND_I]]
test_vbic_s16(int16x4_t a,int16x4_t b)1009 int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
1010 return vbic_s16(a, b);
1011 }
1012
1013 // CHECK-LABEL: define <2 x i32> @test_vbic_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1014 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
1015 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
1016 // CHECK: ret <2 x i32> [[AND_I]]
test_vbic_s32(int32x2_t a,int32x2_t b)1017 int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
1018 return vbic_s32(a, b);
1019 }
1020
1021 // CHECK-LABEL: define <1 x i64> @test_vbic_s64(<1 x i64> %a, <1 x i64> %b) #0 {
1022 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
1023 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
1024 // CHECK: ret <1 x i64> [[AND_I]]
test_vbic_s64(int64x1_t a,int64x1_t b)1025 int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
1026 return vbic_s64(a, b);
1027 }
1028
1029 // CHECK-LABEL: define <8 x i8> @test_vbic_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1030 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1031 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
1032 // CHECK: ret <8 x i8> [[AND_I]]
test_vbic_u8(uint8x8_t a,uint8x8_t b)1033 uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
1034 return vbic_u8(a, b);
1035 }
1036
1037 // CHECK-LABEL: define <4 x i16> @test_vbic_u16(<4 x i16> %a, <4 x i16> %b) #0 {
1038 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
1039 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
1040 // CHECK: ret <4 x i16> [[AND_I]]
test_vbic_u16(uint16x4_t a,uint16x4_t b)1041 uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
1042 return vbic_u16(a, b);
1043 }
1044
1045 // CHECK-LABEL: define <2 x i32> @test_vbic_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1046 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
1047 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
1048 // CHECK: ret <2 x i32> [[AND_I]]
test_vbic_u32(uint32x2_t a,uint32x2_t b)1049 uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
1050 return vbic_u32(a, b);
1051 }
1052
1053 // CHECK-LABEL: define <1 x i64> @test_vbic_u64(<1 x i64> %a, <1 x i64> %b) #0 {
1054 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
1055 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
1056 // CHECK: ret <1 x i64> [[AND_I]]
test_vbic_u64(uint64x1_t a,uint64x1_t b)1057 uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
1058 return vbic_u64(a, b);
1059 }
1060
1061 // CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
1062 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1063 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
1064 // CHECK: ret <16 x i8> [[AND_I]]
test_vbicq_s8(int8x16_t a,int8x16_t b)1065 int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
1066 return vbicq_s8(a, b);
1067 }
1068
1069 // CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
1070 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1071 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
1072 // CHECK: ret <8 x i16> [[AND_I]]
test_vbicq_s16(int16x8_t a,int16x8_t b)1073 int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
1074 return vbicq_s16(a, b);
1075 }
1076
1077 // CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
1078 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
1079 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
1080 // CHECK: ret <4 x i32> [[AND_I]]
test_vbicq_s32(int32x4_t a,int32x4_t b)1081 int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
1082 return vbicq_s32(a, b);
1083 }
1084
1085 // CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
1086 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
1087 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
1088 // CHECK: ret <2 x i64> [[AND_I]]
test_vbicq_s64(int64x2_t a,int64x2_t b)1089 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
1090 return vbicq_s64(a, b);
1091 }
1092
1093 // CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
1094 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1095 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
1096 // CHECK: ret <16 x i8> [[AND_I]]
test_vbicq_u8(uint8x16_t a,uint8x16_t b)1097 uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
1098 return vbicq_u8(a, b);
1099 }
1100
1101 // CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
1102 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1103 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
1104 // CHECK: ret <8 x i16> [[AND_I]]
test_vbicq_u16(uint16x8_t a,uint16x8_t b)1105 uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
1106 return vbicq_u16(a, b);
1107 }
1108
1109 // CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
1110 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
1111 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
1112 // CHECK: ret <4 x i32> [[AND_I]]
test_vbicq_u32(uint32x4_t a,uint32x4_t b)1113 uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
1114 return vbicq_u32(a, b);
1115 }
1116
1117 // CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
1118 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
1119 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
1120 // CHECK: ret <2 x i64> [[AND_I]]
test_vbicq_u64(uint64x2_t a,uint64x2_t b)1121 uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
1122 return vbicq_u64(a, b);
1123 }
1124
1125
1126 // CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
1127 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4
1128 // CHECK: ret <8 x i8> [[VBSL_V_I]]
test_vbsl_s8(uint8x8_t a,int8x8_t b,int8x8_t c)1129 int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) {
1130 return vbsl_s8(a, b, c);
1131 }
1132
1133 // CHECK-LABEL: define <4 x i16> @test_vbsl_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
1134 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1135 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1136 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1137 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4
1138 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1139 // CHECK: ret <4 x i16> [[TMP3]]
test_vbsl_s16(uint16x4_t a,int16x4_t b,int16x4_t c)1140 int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) {
1141 return vbsl_s16(a, b, c);
1142 }
1143
1144 // CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
1145 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1146 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1147 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1148 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4
1149 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1150 // CHECK: ret <2 x i32> [[TMP3]]
test_vbsl_s32(uint32x2_t a,int32x2_t b,int32x2_t c)1151 int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) {
1152 return vbsl_s32(a, b, c);
1153 }
1154
1155 // CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 {
1156 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1157 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1158 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1159 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4
1160 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1161 // CHECK: ret <1 x i64> [[TMP3]]
test_vbsl_s64(uint64x1_t a,int64x1_t b,int64x1_t c)1162 int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) {
1163 return vbsl_s64(a, b, c);
1164 }
1165
1166 // CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
1167 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4
1168 // CHECK: ret <8 x i8> [[VBSL_V_I]]
test_vbsl_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)1169 uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
1170 return vbsl_u8(a, b, c);
1171 }
1172
1173 // CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
1174 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1175 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1176 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1177 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4
1178 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1179 // CHECK: ret <4 x i16> [[TMP3]]
test_vbsl_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)1180 uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
1181 return vbsl_u16(a, b, c);
1182 }
1183
1184 // CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
1185 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1186 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1187 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1188 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4
1189 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1190 // CHECK: ret <2 x i32> [[TMP3]]
test_vbsl_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)1191 uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
1192 return vbsl_u32(a, b, c);
1193 }
1194
1195 // CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 {
1196 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1197 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1198 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1199 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4
1200 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1201 // CHECK: ret <1 x i64> [[TMP3]]
test_vbsl_u64(uint64x1_t a,uint64x1_t b,uint64x1_t c)1202 uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
1203 return vbsl_u64(a, b, c);
1204 }
1205
1206 // CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x i32> %a, <2 x float> %b, <2 x float> %c) #0 {
1207 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1208 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1209 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
1210 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4
1211 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float>
1212 // CHECK: ret <2 x float> [[TMP3]]
test_vbsl_f32(uint32x2_t a,float32x2_t b,float32x2_t c)1213 float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) {
1214 return vbsl_f32(a, b, c);
1215 }
1216
1217 // CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
1218 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4
1219 // CHECK: ret <8 x i8> [[VBSL_V_I]]
test_vbsl_p8(uint8x8_t a,poly8x8_t b,poly8x8_t c)1220 poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
1221 return vbsl_p8(a, b, c);
1222 }
1223
1224 // CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
1225 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1226 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1227 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1228 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4
1229 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1230 // CHECK: ret <4 x i16> [[TMP3]]
test_vbsl_p16(uint16x4_t a,poly16x4_t b,poly16x4_t c)1231 poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
1232 return vbsl_p16(a, b, c);
1233 }
1234
1235 // CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
1236 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4
1237 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_s8(uint8x16_t a,int8x16_t b,int8x16_t c)1238 int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) {
1239 return vbslq_s8(a, b, c);
1240 }
1241
1242 // CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
1243 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1244 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1245 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1246 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4
1247 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1248 // CHECK: ret <8 x i16> [[TMP3]]
test_vbslq_s16(uint16x8_t a,int16x8_t b,int16x8_t c)1249 int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) {
1250 return vbslq_s16(a, b, c);
1251 }
1252
1253 // CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
1254 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1255 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1256 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1257 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4
1258 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1259 // CHECK: ret <4 x i32> [[TMP3]]
test_vbslq_s32(uint32x4_t a,int32x4_t b,int32x4_t c)1260 int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) {
1261 return vbslq_s32(a, b, c);
1262 }
1263
1264 // CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 {
1265 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1266 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1267 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1268 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4
1269 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1270 // CHECK: ret <2 x i64> [[TMP3]]
test_vbslq_s64(uint64x2_t a,int64x2_t b,int64x2_t c)1271 int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) {
1272 return vbslq_s64(a, b, c);
1273 }
1274
1275 // CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
1276 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4
1277 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)1278 uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
1279 return vbslq_u8(a, b, c);
1280 }
1281
1282 // CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
1283 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1284 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1285 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1286 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4
1287 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1288 // CHECK: ret <8 x i16> [[TMP3]]
test_vbslq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)1289 uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
1290 return vbslq_u16(a, b, c);
1291 }
1292
1293 // CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
1294 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1295 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1296 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1297 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4
1298 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1299 // CHECK: ret <4 x i32> [[TMP3]]
test_vbslq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)1300 uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
1301 return vbslq_u32(a, b, c);
1302 }
1303
1304 // CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 {
1305 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1306 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1307 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1308 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4
1309 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1310 // CHECK: ret <2 x i64> [[TMP3]]
test_vbslq_u64(uint64x2_t a,uint64x2_t b,uint64x2_t c)1311 uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
1312 return vbslq_u64(a, b, c);
1313 }
1314
1315 // CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %a, <4 x float> %b, <4 x float> %c) #0 {
1316 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1317 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1318 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
1319 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4
1320 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float>
1321 // CHECK: ret <4 x float> [[TMP3]]
test_vbslq_f32(uint32x4_t a,float32x4_t b,float32x4_t c)1322 float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) {
1323 return vbslq_f32(a, b, c);
1324 }
1325
1326 // CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
1327 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4
1328 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_p8(uint8x16_t a,poly8x16_t b,poly8x16_t c)1329 poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
1330 return vbslq_p8(a, b, c);
1331 }
1332
1333 // CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
1334 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1335 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1336 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1337 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4
1338 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1339 // CHECK: ret <8 x i16> [[TMP3]]
test_vbslq_p16(uint16x8_t a,poly16x8_t b,poly16x8_t c)1340 poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
1341 return vbslq_p16(a, b, c);
1342 }
1343
1344
1345 // CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %a, <2 x float> %b) #0 {
1346 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1347 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1348 // CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1349 // CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1350 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4
1351 // CHECK: ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t a,float32x2_t b)1352 uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) {
1353 return vcage_f32(a, b);
1354 }
1355
1356 // CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %a, <4 x float> %b) #0 {
1357 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1358 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1359 // CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1360 // CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1361 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4
1362 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t a,float32x4_t b)1363 uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) {
1364 return vcageq_f32(a, b);
1365 }
1366
1367
1368 // CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %a, <2 x float> %b) #0 {
1369 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1370 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1371 // CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1372 // CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1373 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4
1374 // CHECK: ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t a,float32x2_t b)1375 uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) {
1376 return vcagt_f32(a, b);
1377 }
1378
1379 // CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %a, <4 x float> %b) #0 {
1380 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1381 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1382 // CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1383 // CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1384 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4
1385 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t a,float32x4_t b)1386 uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) {
1387 return vcagtq_f32(a, b);
1388 }
1389
1390
1391 // CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %a, <2 x float> %b) #0 {
1392 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1393 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1394 // CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1395 // CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1396 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4
1397 // CHECK: ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t a,float32x2_t b)1398 uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) {
1399 return vcale_f32(a, b);
1400 }
1401
1402 // CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %a, <4 x float> %b) #0 {
1403 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1404 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1405 // CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1406 // CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1407 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4
1408 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t a,float32x4_t b)1409 uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) {
1410 return vcaleq_f32(a, b);
1411 }
1412
1413
1414 // CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %a, <2 x float> %b) #0 {
1415 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1416 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1417 // CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1418 // CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1419 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4
1420 // CHECK: ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t a,float32x2_t b)1421 uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) {
1422 return vcalt_f32(a, b);
1423 }
1424
1425 // CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %a, <4 x float> %b) #0 {
1426 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1427 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1428 // CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1429 // CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1430 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4
1431 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t a,float32x4_t b)1432 uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) {
1433 return vcaltq_f32(a, b);
1434 }
1435
1436
1437 // CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %a, <8 x i8> %b) #0 {
1438 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1439 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1440 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t a,int8x8_t b)1441 uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) {
1442 return vceq_s8(a, b);
1443 }
1444
1445 // CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1446 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1447 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1448 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t a,int16x4_t b)1449 uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) {
1450 return vceq_s16(a, b);
1451 }
1452
1453 // CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1454 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1455 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1456 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t a,int32x2_t b)1457 uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) {
1458 return vceq_s32(a, b);
1459 }
1460
1461 // CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %a, <2 x float> %b) #0 {
1462 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b
1463 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1464 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t a,float32x2_t b)1465 uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) {
1466 return vceq_f32(a, b);
1467 }
1468
1469 // CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1470 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1471 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1472 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t a,uint8x8_t b)1473 uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) {
1474 return vceq_u8(a, b);
1475 }
1476
1477 // CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %a, <4 x i16> %b) #0 {
1478 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1479 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1480 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t a,uint16x4_t b)1481 uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) {
1482 return vceq_u16(a, b);
1483 }
1484
1485 // CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1486 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1487 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1488 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t a,uint32x2_t b)1489 uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) {
1490 return vceq_u32(a, b);
1491 }
1492
1493 // CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %a, <8 x i8> %b) #0 {
1494 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1495 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1496 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t a,poly8x8_t b)1497 uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) {
1498 return vceq_p8(a, b);
1499 }
1500
1501 // CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
1502 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1503 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1504 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t a,int8x16_t b)1505 uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) {
1506 return vceqq_s8(a, b);
1507 }
1508
1509 // CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
1510 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1511 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1512 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t a,int16x8_t b)1513 uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) {
1514 return vceqq_s16(a, b);
1515 }
1516
1517 // CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
1518 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1519 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1520 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t a,int32x4_t b)1521 uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) {
1522 return vceqq_s32(a, b);
1523 }
1524
1525 // CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %a, <4 x float> %b) #0 {
1526 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b
1527 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1528 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t a,float32x4_t b)1529 uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) {
1530 return vceqq_f32(a, b);
1531 }
1532
1533 // CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
1534 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1535 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1536 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t a,uint8x16_t b)1537 uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) {
1538 return vceqq_u8(a, b);
1539 }
1540
1541 // CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
1542 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1543 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1544 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t a,uint16x8_t b)1545 uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) {
1546 return vceqq_u16(a, b);
1547 }
1548
1549 // CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
1550 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1551 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1552 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t a,uint32x4_t b)1553 uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) {
1554 return vceqq_u32(a, b);
1555 }
1556
1557 // CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
1558 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1559 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1560 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t a,poly8x16_t b)1561 uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) {
1562 return vceqq_p8(a, b);
1563 }
1564
1565
1566 // CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %a, <8 x i8> %b) #0 {
1567 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b
1568 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1569 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t a,int8x8_t b)1570 uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) {
1571 return vcge_s8(a, b);
1572 }
1573
1574 // CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1575 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b
1576 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1577 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t a,int16x4_t b)1578 uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) {
1579 return vcge_s16(a, b);
1580 }
1581
1582 // CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1583 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b
1584 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1585 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t a,int32x2_t b)1586 uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) {
1587 return vcge_s32(a, b);
1588 }
1589
1590 // CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %a, <2 x float> %b) #0 {
1591 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b
1592 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1593 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t a,float32x2_t b)1594 uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) {
1595 return vcge_f32(a, b);
1596 }
1597
1598 // CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1599 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b
1600 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1601 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t a,uint8x8_t b)1602 uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) {
1603 return vcge_u8(a, b);
1604 }
1605
1606 // CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %a, <4 x i16> %b) #0 {
1607 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b
1608 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1609 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t a,uint16x4_t b)1610 uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) {
1611 return vcge_u16(a, b);
1612 }
1613
1614 // CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1615 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b
1616 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1617 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t a,uint32x2_t b)1618 uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) {
1619 return vcge_u32(a, b);
1620 }
1621
1622 // CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
1623 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b
1624 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1625 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t a,int8x16_t b)1626 uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) {
1627 return vcgeq_s8(a, b);
1628 }
1629
1630 // CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
1631 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b
1632 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1633 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t a,int16x8_t b)1634 uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) {
1635 return vcgeq_s16(a, b);
1636 }
1637
1638 // CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
1639 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b
1640 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1641 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t a,int32x4_t b)1642 uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) {
1643 return vcgeq_s32(a, b);
1644 }
1645
1646 // CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %a, <4 x float> %b) #0 {
1647 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b
1648 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1649 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t a,float32x4_t b)1650 uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) {
1651 return vcgeq_f32(a, b);
1652 }
1653
1654 // CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
1655 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b
1656 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1657 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t a,uint8x16_t b)1658 uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) {
1659 return vcgeq_u8(a, b);
1660 }
1661
1662 // CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
1663 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b
1664 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1665 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t a,uint16x8_t b)1666 uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) {
1667 return vcgeq_u16(a, b);
1668 }
1669
1670 // CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
1671 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b
1672 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1673 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t a,uint32x4_t b)1674 uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) {
1675 return vcgeq_u32(a, b);
1676 }
1677
1678
1679 // CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %a, <8 x i8> %b) #0 {
1680 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b
1681 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1682 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t a,int8x8_t b)1683 uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) {
1684 return vcgt_s8(a, b);
1685 }
1686
1687 // CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1688 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b
1689 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1690 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t a,int16x4_t b)1691 uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) {
1692 return vcgt_s16(a, b);
1693 }
1694
1695 // CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1696 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b
1697 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1698 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t a,int32x2_t b)1699 uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) {
1700 return vcgt_s32(a, b);
1701 }
1702
1703 // CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %a, <2 x float> %b) #0 {
1704 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b
1705 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1706 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t a,float32x2_t b)1707 uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) {
1708 return vcgt_f32(a, b);
1709 }
1710
1711 // CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1712 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b
1713 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1714 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t a,uint8x8_t b)1715 uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) {
1716 return vcgt_u8(a, b);
1717 }
1718
1719 // CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %a, <4 x i16> %b) #0 {
1720 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b
1721 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1722 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t a,uint16x4_t b)1723 uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) {
1724 return vcgt_u16(a, b);
1725 }
1726
1727 // CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1728 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b
1729 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1730 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t a,uint32x2_t b)1731 uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) {
1732 return vcgt_u32(a, b);
1733 }
1734
1735 // CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
1736 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b
1737 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1738 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t a,int8x16_t b)1739 uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) {
1740 return vcgtq_s8(a, b);
1741 }
1742
1743 // CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
1744 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b
1745 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1746 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t a,int16x8_t b)1747 uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) {
1748 return vcgtq_s16(a, b);
1749 }
1750
1751 // CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
1752 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b
1753 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1754 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t a,int32x4_t b)1755 uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) {
1756 return vcgtq_s32(a, b);
1757 }
1758
1759 // CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %a, <4 x float> %b) #0 {
1760 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b
1761 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1762 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t a,float32x4_t b)1763 uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) {
1764 return vcgtq_f32(a, b);
1765 }
1766
1767 // CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
1768 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b
1769 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1770 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t a,uint8x16_t b)1771 uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) {
1772 return vcgtq_u8(a, b);
1773 }
1774
1775 // CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
1776 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b
1777 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1778 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t a,uint16x8_t b)1779 uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) {
1780 return vcgtq_u16(a, b);
1781 }
1782
1783 // CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
1784 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b
1785 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1786 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t a,uint32x4_t b)1787 uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) {
1788 return vcgtq_u32(a, b);
1789 }
1790
1791
1792 // CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %a, <8 x i8> %b) #0 {
1793 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b
1794 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1795 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_s8(int8x8_t a,int8x8_t b)1796 uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) {
1797 return vcle_s8(a, b);
1798 }
1799
1800 // CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1801 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b
1802 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1803 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t a,int16x4_t b)1804 uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) {
1805 return vcle_s16(a, b);
1806 }
1807
1808 // CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1809 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b
1810 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1811 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t a,int32x2_t b)1812 uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) {
1813 return vcle_s32(a, b);
1814 }
1815
1816 // CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %a, <2 x float> %b) #0 {
1817 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b
1818 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1819 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t a,float32x2_t b)1820 uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) {
1821 return vcle_f32(a, b);
1822 }
1823
1824 // CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1825 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b
1826 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1827 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t a,uint8x8_t b)1828 uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) {
1829 return vcle_u8(a, b);
1830 }
1831
1832 // CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %a, <4 x i16> %b) #0 {
1833 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b
1834 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1835 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t a,uint16x4_t b)1836 uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) {
1837 return vcle_u16(a, b);
1838 }
1839
1840 // CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %a, <2 x i32> %b) #0 {
1841 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b
1842 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1843 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t a,uint32x2_t b)1844 uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) {
1845 return vcle_u32(a, b);
1846 }
1847
1848 // CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
1849 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b
1850 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1851 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t a,int8x16_t b)1852 uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) {
1853 return vcleq_s8(a, b);
1854 }
1855
1856 // CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
1857 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b
1858 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1859 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t a,int16x8_t b)1860 uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) {
1861 return vcleq_s16(a, b);
1862 }
1863
1864 // CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
1865 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b
1866 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1867 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t a,int32x4_t b)1868 uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) {
1869 return vcleq_s32(a, b);
1870 }
1871
1872 // CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %a, <4 x float> %b) #0 {
1873 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b
1874 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1875 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t a,float32x4_t b)1876 uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) {
1877 return vcleq_f32(a, b);
1878 }
1879
1880 // CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
1881 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b
1882 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1883 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t a,uint8x16_t b)1884 uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) {
1885 return vcleq_u8(a, b);
1886 }
1887
1888 // CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
1889 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b
1890 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1891 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t a,uint16x8_t b)1892 uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) {
1893 return vcleq_u16(a, b);
1894 }
1895
1896 // CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
1897 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b
1898 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1899 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t a,uint32x4_t b)1900 uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) {
1901 return vcleq_u32(a, b);
1902 }
1903
1904
1905 // CHECK-LABEL: define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
1906 // CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4
1907 // CHECK: ret <8 x i8> [[VCLS_V_I]]
test_vcls_s8(int8x8_t a)1908 int8x8_t test_vcls_s8(int8x8_t a) {
1909 return vcls_s8(a);
1910 }
1911
1912 // CHECK-LABEL: define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
1913 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1914 // CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1915 // CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> [[VCLS_V_I]]) #4
1916 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
1917 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16>
1918 // CHECK: ret <4 x i16> [[TMP1]]
test_vcls_s16(int16x4_t a)1919 int16x4_t test_vcls_s16(int16x4_t a) {
1920 return vcls_s16(a);
1921 }
1922
1923 // CHECK-LABEL: define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
1924 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1925 // CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1926 // CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> [[VCLS_V_I]]) #4
1927 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
1928 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32>
1929 // CHECK: ret <2 x i32> [[TMP1]]
test_vcls_s32(int32x2_t a)1930 int32x2_t test_vcls_s32(int32x2_t a) {
1931 return vcls_s32(a);
1932 }
1933
1934 // CHECK-LABEL: define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
1935 // CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4
1936 // CHECK: ret <16 x i8> [[VCLSQ_V_I]]
test_vclsq_s8(int8x16_t a)1937 int8x16_t test_vclsq_s8(int8x16_t a) {
1938 return vclsq_s8(a);
1939 }
1940
1941 // CHECK-LABEL: define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
1942 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1943 // CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1944 // CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> [[VCLSQ_V_I]]) #4
1945 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
1946 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16>
1947 // CHECK: ret <8 x i16> [[TMP1]]
test_vclsq_s16(int16x8_t a)1948 int16x8_t test_vclsq_s16(int16x8_t a) {
1949 return vclsq_s16(a);
1950 }
1951
1952 // CHECK-LABEL: define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
1953 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1954 // CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1955 // CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> [[VCLSQ_V_I]]) #4
1956 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
1957 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32>
1958 // CHECK: ret <4 x i32> [[TMP1]]
test_vclsq_s32(int32x4_t a)1959 int32x4_t test_vclsq_s32(int32x4_t a) {
1960 return vclsq_s32(a);
1961 }
1962
1963
1964 // CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %a, <8 x i8> %b) #0 {
1965 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b
1966 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1967 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_s8(int8x8_t a,int8x8_t b)1968 uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) {
1969 return vclt_s8(a, b);
1970 }
1971
1972 // CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %a, <4 x i16> %b) #0 {
1973 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b
1974 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1975 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t a,int16x4_t b)1976 uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) {
1977 return vclt_s16(a, b);
1978 }
1979
1980 // CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %a, <2 x i32> %b) #0 {
1981 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b
1982 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1983 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t a,int32x2_t b)1984 uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) {
1985 return vclt_s32(a, b);
1986 }
1987
1988 // CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %a, <2 x float> %b) #0 {
1989 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b
1990 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1991 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t a,float32x2_t b)1992 uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) {
1993 return vclt_f32(a, b);
1994 }
1995
1996 // CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %a, <8 x i8> %b) #0 {
1997 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b
1998 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1999 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t a,uint8x8_t b)2000 uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) {
2001 return vclt_u8(a, b);
2002 }
2003
2004 // CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %a, <4 x i16> %b) #0 {
2005 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b
2006 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2007 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t a,uint16x4_t b)2008 uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) {
2009 return vclt_u16(a, b);
2010 }
2011
2012 // CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %a, <2 x i32> %b) #0 {
2013 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b
2014 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2015 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t a,uint32x2_t b)2016 uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) {
2017 return vclt_u32(a, b);
2018 }
2019
2020 // CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
2021 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b
2022 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2023 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t a,int8x16_t b)2024 uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) {
2025 return vcltq_s8(a, b);
2026 }
2027
2028 // CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
2029 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b
2030 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2031 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t a,int16x8_t b)2032 uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) {
2033 return vcltq_s16(a, b);
2034 }
2035
2036 // CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
2037 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b
2038 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2039 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t a,int32x4_t b)2040 uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) {
2041 return vcltq_s32(a, b);
2042 }
2043
2044 // CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %a, <4 x float> %b) #0 {
2045 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b
2046 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2047 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t a,float32x4_t b)2048 uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) {
2049 return vcltq_f32(a, b);
2050 }
2051
2052 // CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
2053 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b
2054 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2055 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t a,uint8x16_t b)2056 uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) {
2057 return vcltq_u8(a, b);
2058 }
2059
2060 // CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
2061 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b
2062 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2063 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t a,uint16x8_t b)2064 uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) {
2065 return vcltq_u16(a, b);
2066 }
2067
2068 // CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
2069 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b
2070 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2071 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t a,uint32x4_t b)2072 uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) {
2073 return vcltq_u32(a, b);
2074 }
2075
2076
2077 // CHECK-LABEL: define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
2078 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
2079 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vclz_s8(int8x8_t a)2080 int8x8_t test_vclz_s8(int8x8_t a) {
2081 return vclz_s8(a);
2082 }
2083
2084 // CHECK-LABEL: define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
2085 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2086 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2087 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4
2088 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2089 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
2090 // CHECK: ret <4 x i16> [[TMP1]]
test_vclz_s16(int16x4_t a)2091 int16x4_t test_vclz_s16(int16x4_t a) {
2092 return vclz_s16(a);
2093 }
2094
2095 // CHECK-LABEL: define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
2096 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2097 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2098 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4
2099 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2100 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
2101 // CHECK: ret <2 x i32> [[TMP1]]
test_vclz_s32(int32x2_t a)2102 int32x2_t test_vclz_s32(int32x2_t a) {
2103 return vclz_s32(a);
2104 }
2105
2106 // CHECK-LABEL: define <8 x i8> @test_vclz_u8(<8 x i8> %a) #0 {
2107 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
2108 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vclz_u8(uint8x8_t a)2109 uint8x8_t test_vclz_u8(uint8x8_t a) {
2110 return vclz_u8(a);
2111 }
2112
2113 // CHECK-LABEL: define <4 x i16> @test_vclz_u16(<4 x i16> %a) #0 {
2114 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2115 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2116 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4
2117 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2118 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
2119 // CHECK: ret <4 x i16> [[TMP1]]
test_vclz_u16(uint16x4_t a)2120 uint16x4_t test_vclz_u16(uint16x4_t a) {
2121 return vclz_u16(a);
2122 }
2123
2124 // CHECK-LABEL: define <2 x i32> @test_vclz_u32(<2 x i32> %a) #0 {
2125 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2126 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2127 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4
2128 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2129 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
2130 // CHECK: ret <2 x i32> [[TMP1]]
test_vclz_u32(uint32x2_t a)2131 uint32x2_t test_vclz_u32(uint32x2_t a) {
2132 return vclz_u32(a);
2133 }
2134
2135 // CHECK-LABEL: define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
2136 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
2137 // CHECK: ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_s8(int8x16_t a)2138 int8x16_t test_vclzq_s8(int8x16_t a) {
2139 return vclzq_s8(a);
2140 }
2141
2142 // CHECK-LABEL: define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
2143 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2144 // CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2145 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4
2146 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2147 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
2148 // CHECK: ret <8 x i16> [[TMP1]]
test_vclzq_s16(int16x8_t a)2149 int16x8_t test_vclzq_s16(int16x8_t a) {
2150 return vclzq_s16(a);
2151 }
2152
2153 // CHECK-LABEL: define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
2154 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2155 // CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2156 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4
2157 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2158 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
2159 // CHECK: ret <4 x i32> [[TMP1]]
test_vclzq_s32(int32x4_t a)2160 int32x4_t test_vclzq_s32(int32x4_t a) {
2161 return vclzq_s32(a);
2162 }
2163
2164 // CHECK-LABEL: define <16 x i8> @test_vclzq_u8(<16 x i8> %a) #0 {
2165 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
2166 // CHECK: ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_u8(uint8x16_t a)2167 uint8x16_t test_vclzq_u8(uint8x16_t a) {
2168 return vclzq_u8(a);
2169 }
2170
2171 // CHECK-LABEL: define <8 x i16> @test_vclzq_u16(<8 x i16> %a) #0 {
2172 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2173 // CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2174 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4
2175 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2176 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
2177 // CHECK: ret <8 x i16> [[TMP1]]
test_vclzq_u16(uint16x8_t a)2178 uint16x8_t test_vclzq_u16(uint16x8_t a) {
2179 return vclzq_u16(a);
2180 }
2181
2182 // CHECK-LABEL: define <4 x i32> @test_vclzq_u32(<4 x i32> %a) #0 {
2183 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2184 // CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2185 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4
2186 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2187 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
2188 // CHECK: ret <4 x i32> [[TMP1]]
test_vclzq_u32(uint32x4_t a)2189 uint32x4_t test_vclzq_u32(uint32x4_t a) {
2190 return vclzq_u32(a);
2191 }
2192
2193
2194 // CHECK-LABEL: define <8 x i8> @test_vcnt_u8(<8 x i8> %a) #0 {
2195 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
2196 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcnt_u8(uint8x8_t a)2197 uint8x8_t test_vcnt_u8(uint8x8_t a) {
2198 return vcnt_u8(a);
2199 }
2200
2201 // CHECK-LABEL: define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
2202 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
2203 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcnt_s8(int8x8_t a)2204 int8x8_t test_vcnt_s8(int8x8_t a) {
2205 return vcnt_s8(a);
2206 }
2207
2208 // CHECK-LABEL: define <8 x i8> @test_vcnt_p8(<8 x i8> %a) #0 {
2209 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
2210 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcnt_p8(poly8x8_t a)2211 poly8x8_t test_vcnt_p8(poly8x8_t a) {
2212 return vcnt_p8(a);
2213 }
2214
2215 // CHECK-LABEL: define <16 x i8> @test_vcntq_u8(<16 x i8> %a) #0 {
2216 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
2217 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_u8(uint8x16_t a)2218 uint8x16_t test_vcntq_u8(uint8x16_t a) {
2219 return vcntq_u8(a);
2220 }
2221
2222 // CHECK-LABEL: define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
2223 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
2224 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_s8(int8x16_t a)2225 int8x16_t test_vcntq_s8(int8x16_t a) {
2226 return vcntq_s8(a);
2227 }
2228
2229 // CHECK-LABEL: define <16 x i8> @test_vcntq_p8(<16 x i8> %a) #0 {
2230 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
2231 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_p8(poly8x16_t a)2232 poly8x16_t test_vcntq_p8(poly8x16_t a) {
2233 return vcntq_p8(a);
2234 }
2235
2236
2237 // CHECK-LABEL: define <16 x i8> @test_vcombine_s8(<8 x i8> %a, <8 x i8> %b) #0 {
2238 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2239 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_s8(int8x8_t a,int8x8_t b)2240 int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) {
2241 return vcombine_s8(a, b);
2242 }
2243
2244 // CHECK-LABEL: define <8 x i16> @test_vcombine_s16(<4 x i16> %a, <4 x i16> %b) #0 {
2245 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2246 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_s16(int16x4_t a,int16x4_t b)2247 int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) {
2248 return vcombine_s16(a, b);
2249 }
2250
2251 // CHECK-LABEL: define <4 x i32> @test_vcombine_s32(<2 x i32> %a, <2 x i32> %b) #0 {
2252 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2253 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_s32(int32x2_t a,int32x2_t b)2254 int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) {
2255 return vcombine_s32(a, b);
2256 }
2257
2258 // CHECK-LABEL: define <2 x i64> @test_vcombine_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2259 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2260 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_s64(int64x1_t a,int64x1_t b)2261 int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) {
2262 return vcombine_s64(a, b);
2263 }
2264
2265 // CHECK-LABEL: define <8 x half> @test_vcombine_f16(<4 x half> %a, <4 x half> %b) #0 {
2266 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2267 // CHECK: ret <8 x half> [[SHUFFLE_I]]
test_vcombine_f16(float16x4_t a,float16x4_t b)2268 float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) {
2269 return vcombine_f16(a, b);
2270 }
2271
2272 // CHECK-LABEL: define <4 x float> @test_vcombine_f32(<2 x float> %a, <2 x float> %b) #0 {
2273 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2274 // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vcombine_f32(float32x2_t a,float32x2_t b)2275 float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) {
2276 return vcombine_f32(a, b);
2277 }
2278
2279 // CHECK-LABEL: define <16 x i8> @test_vcombine_u8(<8 x i8> %a, <8 x i8> %b) #0 {
2280 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2281 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_u8(uint8x8_t a,uint8x8_t b)2282 uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) {
2283 return vcombine_u8(a, b);
2284 }
2285
2286 // CHECK-LABEL: define <8 x i16> @test_vcombine_u16(<4 x i16> %a, <4 x i16> %b) #0 {
2287 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2288 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_u16(uint16x4_t a,uint16x4_t b)2289 uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) {
2290 return vcombine_u16(a, b);
2291 }
2292
2293 // CHECK-LABEL: define <4 x i32> @test_vcombine_u32(<2 x i32> %a, <2 x i32> %b) #0 {
2294 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2295 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_u32(uint32x2_t a,uint32x2_t b)2296 uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) {
2297 return vcombine_u32(a, b);
2298 }
2299
2300 // CHECK-LABEL: define <2 x i64> @test_vcombine_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2301 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2302 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_u64(uint64x1_t a,uint64x1_t b)2303 uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) {
2304 return vcombine_u64(a, b);
2305 }
2306
2307 // CHECK-LABEL: define <16 x i8> @test_vcombine_p8(<8 x i8> %a, <8 x i8> %b) #0 {
2308 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2309 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_p8(poly8x8_t a,poly8x8_t b)2310 poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) {
2311 return vcombine_p8(a, b);
2312 }
2313
2314 // CHECK-LABEL: define <8 x i16> @test_vcombine_p16(<4 x i16> %a, <4 x i16> %b) #0 {
2315 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2316 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_p16(poly16x4_t a,poly16x4_t b)2317 poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) {
2318 return vcombine_p16(a, b);
2319 }
2320
2321
2322 // CHECK-LABEL: define <8 x i8> @test_vcreate_s8(i64 %a) #0 {
2323 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2324 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4
2325 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_s8(uint64_t a)2326 int8x8_t test_vcreate_s8(uint64_t a) {
2327 return vclz_s8(vcreate_s8(a));
2328 }
2329
2330 // CHECK-LABEL: define <4 x i16> @test_vcreate_s16(i64 %a) #0 {
2331 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2332 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2333 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2334 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4
2335 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2336 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
2337 // CHECK: ret <4 x i16> [[TMP2]]
test_vcreate_s16(uint64_t a)2338 int16x4_t test_vcreate_s16(uint64_t a) {
2339 return vclz_s16(vcreate_s16(a));
2340 }
2341
2342 // CHECK-LABEL: define <2 x i32> @test_vcreate_s32(i64 %a) #0 {
2343 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2344 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2345 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2346 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4
2347 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2348 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
2349 // CHECK: ret <2 x i32> [[TMP2]]
test_vcreate_s32(uint64_t a)2350 int32x2_t test_vcreate_s32(uint64_t a) {
2351 return vclz_s32(vcreate_s32(a));
2352 }
2353
2354 // CHECK-LABEL: define <4 x half> @test_vcreate_f16(i64 %a) #0 {
2355 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x half>
2356 // CHECK: ret <4 x half> [[TMP0]]
test_vcreate_f16(uint64_t a)2357 float16x4_t test_vcreate_f16(uint64_t a) {
2358 return vcreate_f16(a);
2359 }
2360
2361 // CHECK-LABEL: define <2 x float> @test_vcreate_f32(i64 %a) #0 {
2362 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x float>
2363 // CHECK: ret <2 x float> [[TMP0]]
test_vcreate_f32(uint64_t a)2364 float32x2_t test_vcreate_f32(uint64_t a) {
2365 return vcreate_f32(a);
2366 }
2367
2368 // CHECK-LABEL: define <8 x i8> @test_vcreate_u8(i64 %a) #0 {
2369 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2370 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4
2371 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_u8(uint64_t a)2372 uint8x8_t test_vcreate_u8(uint64_t a) {
2373 return vclz_s8(vcreate_u8(a));
2374 }
2375
2376 // CHECK-LABEL: define <4 x i16> @test_vcreate_u16(i64 %a) #0 {
2377 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2378 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2379 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2380 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4
2381 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2382 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
2383 // CHECK: ret <4 x i16> [[TMP2]]
test_vcreate_u16(uint64_t a)2384 uint16x4_t test_vcreate_u16(uint64_t a) {
2385 return vclz_s16(vcreate_u16(a));
2386 }
2387
2388 // CHECK-LABEL: define <2 x i32> @test_vcreate_u32(i64 %a) #0 {
2389 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2390 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2391 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2392 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4
2393 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2394 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
2395 // CHECK: ret <2 x i32> [[TMP2]]
test_vcreate_u32(uint64_t a)2396 uint32x2_t test_vcreate_u32(uint64_t a) {
2397 return vclz_s32(vcreate_u32(a));
2398 }
2399
2400
2401 // We have two ways of lowering that. Either with one 'vmov d, r, r' or
2402 // with two 'vmov d[],r'. LLVM does the latter. We may want to be less
2403 // strict about the matching pattern if it starts causing problem.
2404 // CHECK-LABEL: define <1 x i64> @test_vcreate_u64(i64 %a) #0 {
2405 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2406 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2407 // CHECK: ret <1 x i64> [[ADD_I]]
test_vcreate_u64(uint64_t a)2408 uint64x1_t test_vcreate_u64(uint64_t a) {
2409 uint64x1_t tmp = vcreate_u64(a);
2410 return vadd_u64(tmp, tmp);
2411
2412 }
2413
2414 // CHECK-LABEL: define <8 x i8> @test_vcreate_p8(i64 %a) #0 {
2415 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2416 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]]) #4
2417 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcreate_p8(uint64_t a)2418 poly8x8_t test_vcreate_p8(uint64_t a) {
2419 return vcnt_p8(vcreate_p8(a));
2420 }
2421
2422 // CHECK-LABEL: define <4 x i16> @test_vcreate_p16(i64 %a) #0 {
2423 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2424 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2425 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2426 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2427 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]]) #4
2428 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
2429 // CHECK: ret <4 x i16> [[TMP4]]
test_vcreate_p16(uint64_t a)2430 poly16x4_t test_vcreate_p16(uint64_t a) {
2431 poly16x4_t tmp = vcreate_p16(a);
2432 return vbsl_p16(tmp, tmp, tmp);
2433 }
2434
2435 // CHECK-LABEL: define <1 x i64> @test_vcreate_s64(i64 %a) #0 {
2436 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2437 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2438 // CHECK: ret <1 x i64> [[ADD_I]]
test_vcreate_s64(uint64_t a)2439 int64x1_t test_vcreate_s64(uint64_t a) {
2440 int64x1_t tmp = vcreate_s64(a);
2441 return vadd_s64(tmp, tmp);
2442 }
2443
2444
2445 // CHECK-LABEL: define <4 x half> @test_vcvt_f16_f32(<4 x float> %a) #0 {
2446 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2447 // CHECK: [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2448 // CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]]) #4
2449 // CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
2450 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
2451 // CHECK: ret <4 x half> [[TMP1]]
test_vcvt_f16_f32(float32x4_t a)2452 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
2453 return vcvt_f16_f32(a);
2454 }
2455
2456
2457 // CHECK-LABEL: define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
2458 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2459 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2460 // CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
2461 // CHECK: ret <2 x float> [[VCVT_I]]
test_vcvt_f32_s32(int32x2_t a)2462 float32x2_t test_vcvt_f32_s32(int32x2_t a) {
2463 return vcvt_f32_s32(a);
2464 }
2465
2466 // CHECK-LABEL: define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
2467 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2468 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2469 // CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float>
2470 // CHECK: ret <2 x float> [[VCVT_I]]
test_vcvt_f32_u32(uint32x2_t a)2471 float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
2472 return vcvt_f32_u32(a);
2473 }
2474
2475 // CHECK-LABEL: define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
2476 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2477 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2478 // CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
2479 // CHECK: ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_s32(int32x4_t a)2480 float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
2481 return vcvtq_f32_s32(a);
2482 }
2483
2484 // CHECK-LABEL: define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
2485 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2486 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2487 // CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
2488 // CHECK: ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_u32(uint32x4_t a)2489 float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
2490 return vcvtq_f32_u32(a);
2491 }
2492
2493
2494 // CHECK-LABEL: define <4 x float> @test_vcvt_f32_f16(<4 x half> %a) #0 {
2495 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
2496 // CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2497 // CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #4
2498 // CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
2499 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x float>
2500 // CHECK: ret <4 x float> [[TMP1]]
test_vcvt_f32_f16(float16x4_t a)2501 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
2502 return vcvt_f32_f16(a);
2503 }
2504
2505
2506 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 {
2507 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2508 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2509 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2510 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)2511 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
2512 return vcvt_n_f32_s32(a, 1);
2513 }
2514
2515 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 {
2516 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2517 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2518 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2519 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)2520 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
2521 return vcvt_n_f32_u32(a, 1);
2522 }
2523
2524 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 {
2525 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2526 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2527 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2528 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)2529 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
2530 return vcvtq_n_f32_s32(a, 3);
2531 }
2532
2533 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 {
2534 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2535 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2536 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2537 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)2538 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
2539 return vcvtq_n_f32_u32(a, 3);
2540 }
2541
2542
2543 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 {
2544 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2545 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2546 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2547 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)2548 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
2549 return vcvt_n_s32_f32(a, 1);
2550 }
2551
2552 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 {
2553 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2554 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2555 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2556 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)2557 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
2558 return vcvtq_n_s32_f32(a, 3);
2559 }
2560
2561
2562 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 {
2563 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2564 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2565 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2566 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)2567 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
2568 return vcvt_n_u32_f32(a, 1);
2569 }
2570
2571 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 {
2572 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2573 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2574 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2575 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)2576 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
2577 return vcvtq_n_u32_f32(a, 3);
2578 }
2579
2580
2581 // CHECK-LABEL: define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
2582 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2583 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2584 // CHECK: [[VCVT_I:%.*]] = fptosi <2 x float> [[TMP1]] to <2 x i32>
2585 // CHECK: ret <2 x i32> [[VCVT_I]]
test_vcvt_s32_f32(float32x2_t a)2586 int32x2_t test_vcvt_s32_f32(float32x2_t a) {
2587 return vcvt_s32_f32(a);
2588 }
2589
2590 // CHECK-LABEL: define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
2591 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2592 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2593 // CHECK: [[VCVT_I:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
2594 // CHECK: ret <4 x i32> [[VCVT_I]]
test_vcvtq_s32_f32(float32x4_t a)2595 int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
2596 return vcvtq_s32_f32(a);
2597 }
2598
2599
2600 // CHECK-LABEL: define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
2601 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2602 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2603 // CHECK: [[VCVT_I:%.*]] = fptoui <2 x float> [[TMP1]] to <2 x i32>
2604 // CHECK: ret <2 x i32> [[VCVT_I]]
test_vcvt_u32_f32(float32x2_t a)2605 uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
2606 return vcvt_u32_f32(a);
2607 }
2608
2609 // CHECK-LABEL: define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
2610 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2611 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2612 // CHECK: [[VCVT_I:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32>
2613 // CHECK: ret <4 x i32> [[VCVT_I]]
test_vcvtq_u32_f32(float32x4_t a)2614 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
2615 return vcvtq_u32_f32(a);
2616 }
2617
2618
2619 // CHECK-LABEL: define <8 x i8> @test_vdup_lane_u8(<8 x i8> %a) #0 {
2620 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2621 // CHECK: ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_u8(uint8x8_t a)2622 uint8x8_t test_vdup_lane_u8(uint8x8_t a) {
2623 return vdup_lane_u8(a, 7);
2624 }
2625
2626 // CHECK-LABEL: define <4 x i16> @test_vdup_lane_u16(<4 x i16> %a) #0 {
2627 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2628 // CHECK: ret <4 x i16> [[SHUFFLE]]
test_vdup_lane_u16(uint16x4_t a)2629 uint16x4_t test_vdup_lane_u16(uint16x4_t a) {
2630 return vdup_lane_u16(a, 3);
2631 }
2632
2633 // CHECK-LABEL: define <2 x i32> @test_vdup_lane_u32(<2 x i32> %a) #0 {
2634 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 1>
2635 // CHECK: ret <2 x i32> [[SHUFFLE]]
test_vdup_lane_u32(uint32x2_t a)2636 uint32x2_t test_vdup_lane_u32(uint32x2_t a) {
2637 return vdup_lane_u32(a, 1);
2638 }
2639
2640 // CHECK-LABEL: define <8 x i8> @test_vdup_lane_s8(<8 x i8> %a) #0 {
2641 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2642 // CHECK: ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_s8(int8x8_t a)2643 int8x8_t test_vdup_lane_s8(int8x8_t a) {
2644 return vdup_lane_s8(a, 7);
2645 }
2646
2647 // CHECK-LABEL: define <4 x i16> @test_vdup_lane_s16(<4 x i16> %a) #0 {
2648 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2649 // CHECK: ret <4 x i16> [[SHUFFLE]]
test_vdup_lane_s16(int16x4_t a)2650 int16x4_t test_vdup_lane_s16(int16x4_t a) {
2651 return vdup_lane_s16(a, 3);
2652 }
2653
2654 // CHECK-LABEL: define <2 x i32> @test_vdup_lane_s32(<2 x i32> %a) #0 {
2655 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 1>
2656 // CHECK: ret <2 x i32> [[SHUFFLE]]
test_vdup_lane_s32(int32x2_t a)2657 int32x2_t test_vdup_lane_s32(int32x2_t a) {
2658 return vdup_lane_s32(a, 1);
2659 }
2660
2661 // CHECK-LABEL: define <8 x i8> @test_vdup_lane_p8(<8 x i8> %a) #0 {
2662 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2663 // CHECK: ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_p8(poly8x8_t a)2664 poly8x8_t test_vdup_lane_p8(poly8x8_t a) {
2665 return vdup_lane_p8(a, 7);
2666 }
2667
2668 // CHECK-LABEL: define <4 x i16> @test_vdup_lane_p16(<4 x i16> %a) #0 {
2669 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2670 // CHECK: ret <4 x i16> [[SHUFFLE]]
test_vdup_lane_p16(poly16x4_t a)2671 poly16x4_t test_vdup_lane_p16(poly16x4_t a) {
2672 return vdup_lane_p16(a, 3);
2673 }
2674
2675 // CHECK-LABEL: define <2 x float> @test_vdup_lane_f32(<2 x float> %a) #0 {
2676 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 1>
2677 // CHECK: ret <2 x float> [[SHUFFLE]]
test_vdup_lane_f32(float32x2_t a)2678 float32x2_t test_vdup_lane_f32(float32x2_t a) {
2679 return vdup_lane_f32(a, 1);
2680 }
2681
2682 // CHECK-LABEL: define <16 x i8> @test_vdupq_lane_u8(<8 x i8> %a) #0 {
2683 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2684 // CHECK: ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_u8(uint8x8_t a)2685 uint8x16_t test_vdupq_lane_u8(uint8x8_t a) {
2686 return vdupq_lane_u8(a, 7);
2687 }
2688
2689 // CHECK-LABEL: define <8 x i16> @test_vdupq_lane_u16(<4 x i16> %a) #0 {
2690 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2691 // CHECK: ret <8 x i16> [[SHUFFLE]]
test_vdupq_lane_u16(uint16x4_t a)2692 uint16x8_t test_vdupq_lane_u16(uint16x4_t a) {
2693 return vdupq_lane_u16(a, 3);
2694 }
2695
2696 // CHECK-LABEL: define <4 x i32> @test_vdupq_lane_u32(<2 x i32> %a) #0 {
2697 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2698 // CHECK: ret <4 x i32> [[SHUFFLE]]
test_vdupq_lane_u32(uint32x2_t a)2699 uint32x4_t test_vdupq_lane_u32(uint32x2_t a) {
2700 return vdupq_lane_u32(a, 1);
2701 }
2702
2703 // CHECK-LABEL: define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %a) #0 {
2704 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2705 // CHECK: ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_s8(int8x8_t a)2706 int8x16_t test_vdupq_lane_s8(int8x8_t a) {
2707 return vdupq_lane_s8(a, 7);
2708 }
2709
2710 // CHECK-LABEL: define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %a) #0 {
2711 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2712 // CHECK: ret <8 x i16> [[SHUFFLE]]
test_vdupq_lane_s16(int16x4_t a)2713 int16x8_t test_vdupq_lane_s16(int16x4_t a) {
2714 return vdupq_lane_s16(a, 3);
2715 }
2716
2717 // CHECK-LABEL: define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %a) #0 {
2718 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2719 // CHECK: ret <4 x i32> [[SHUFFLE]]
test_vdupq_lane_s32(int32x2_t a)2720 int32x4_t test_vdupq_lane_s32(int32x2_t a) {
2721 return vdupq_lane_s32(a, 1);
2722 }
2723
2724 // CHECK-LABEL: define <16 x i8> @test_vdupq_lane_p8(<8 x i8> %a) #0 {
2725 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2726 // CHECK: ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_p8(poly8x8_t a)2727 poly8x16_t test_vdupq_lane_p8(poly8x8_t a) {
2728 return vdupq_lane_p8(a, 7);
2729 }
2730
2731 // CHECK-LABEL: define <8 x i16> @test_vdupq_lane_p16(<4 x i16> %a) #0 {
2732 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2733 // CHECK: ret <8 x i16> [[SHUFFLE]]
test_vdupq_lane_p16(poly16x4_t a)2734 poly16x8_t test_vdupq_lane_p16(poly16x4_t a) {
2735 return vdupq_lane_p16(a, 3);
2736 }
2737
2738 // CHECK-LABEL: define <4 x float> @test_vdupq_lane_f32(<2 x float> %a) #0 {
2739 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2740 // CHECK: ret <4 x float> [[SHUFFLE]]
test_vdupq_lane_f32(float32x2_t a)2741 float32x4_t test_vdupq_lane_f32(float32x2_t a) {
2742 return vdupq_lane_f32(a, 1);
2743 }
2744
2745 // CHECK-LABEL: define <1 x i64> @test_vdup_lane_s64(<1 x i64> %a) #0 {
2746 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer
2747 // CHECK: ret <1 x i64> [[SHUFFLE]]
test_vdup_lane_s64(int64x1_t a)2748 int64x1_t test_vdup_lane_s64(int64x1_t a) {
2749 return vdup_lane_s64(a, 0);
2750 }
2751
2752 // CHECK-LABEL: define <1 x i64> @test_vdup_lane_u64(<1 x i64> %a) #0 {
2753 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer
2754 // CHECK: ret <1 x i64> [[SHUFFLE]]
test_vdup_lane_u64(uint64x1_t a)2755 uint64x1_t test_vdup_lane_u64(uint64x1_t a) {
2756 return vdup_lane_u64(a, 0);
2757 }
2758
2759 // CHECK-LABEL: define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %a) #0 {
2760 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer
2761 // CHECK: ret <2 x i64> [[SHUFFLE]]
test_vdupq_lane_s64(int64x1_t a)2762 int64x2_t test_vdupq_lane_s64(int64x1_t a) {
2763 return vdupq_lane_s64(a, 0);
2764 }
2765
2766 // CHECK-LABEL: define <2 x i64> @test_vdupq_lane_u64(<1 x i64> %a) #0 {
2767 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer
2768 // CHECK: ret <2 x i64> [[SHUFFLE]]
test_vdupq_lane_u64(uint64x1_t a)2769 uint64x2_t test_vdupq_lane_u64(uint64x1_t a) {
2770 return vdupq_lane_u64(a, 0);
2771 }
2772
2773
2774 // CHECK-LABEL: define <8 x i8> @test_vdup_n_u8(i8 zeroext %a) #0 {
2775 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2776 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2777 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2778 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2779 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2780 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2781 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2782 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2783 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_u8(uint8_t a)2784 uint8x8_t test_vdup_n_u8(uint8_t a) {
2785 return vdup_n_u8(a);
2786 }
2787
2788 // CHECK-LABEL: define <4 x i16> @test_vdup_n_u16(i16 zeroext %a) #0 {
2789 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2790 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2791 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2792 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2793 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_u16(uint16_t a)2794 uint16x4_t test_vdup_n_u16(uint16_t a) {
2795 return vdup_n_u16(a);
2796 }
2797
2798 // CHECK-LABEL: define <2 x i32> @test_vdup_n_u32(i32 %a) #0 {
2799 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2800 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2801 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_u32(uint32_t a)2802 uint32x2_t test_vdup_n_u32(uint32_t a) {
2803 return vdup_n_u32(a);
2804 }
2805
2806 // CHECK-LABEL: define <8 x i8> @test_vdup_n_s8(i8 signext %a) #0 {
2807 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2808 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2809 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2810 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2811 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2812 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2813 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2814 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2815 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_s8(int8_t a)2816 int8x8_t test_vdup_n_s8(int8_t a) {
2817 return vdup_n_s8(a);
2818 }
2819
2820 // CHECK-LABEL: define <4 x i16> @test_vdup_n_s16(i16 signext %a) #0 {
2821 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2822 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2823 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2824 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2825 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_s16(int16_t a)2826 int16x4_t test_vdup_n_s16(int16_t a) {
2827 return vdup_n_s16(a);
2828 }
2829
2830 // CHECK-LABEL: define <2 x i32> @test_vdup_n_s32(i32 %a) #0 {
2831 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2832 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2833 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_s32(int32_t a)2834 int32x2_t test_vdup_n_s32(int32_t a) {
2835 return vdup_n_s32(a);
2836 }
2837
2838 // CHECK-LABEL: define <8 x i8> @test_vdup_n_p8(i8 signext %a) #0 {
2839 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2840 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2841 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2842 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2843 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2844 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2845 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2846 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2847 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_p8(poly8_t a)2848 poly8x8_t test_vdup_n_p8(poly8_t a) {
2849 return vdup_n_p8(a);
2850 }
2851
2852 // CHECK-LABEL: define <4 x i16> @test_vdup_n_p16(i16 signext %a) #0 {
2853 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2854 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2855 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2856 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2857 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_p16(poly16_t a)2858 poly16x4_t test_vdup_n_p16(poly16_t a) {
2859 return vdup_n_p16(a);
2860 }
2861
2862 // CHECK-LABEL: define <4 x half> @test_vdup_n_f16(half* %a) #0 {
2863 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
2864 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
2865 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
2866 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
2867 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
2868 // CHECK: ret <4 x half> [[VECINIT3]]
test_vdup_n_f16(float16_t * a)2869 float16x4_t test_vdup_n_f16(float16_t *a) {
2870 return vdup_n_f16(*a);
2871 }
2872
2873 // CHECK-LABEL: define <2 x float> @test_vdup_n_f32(float %a) #0 {
2874 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
2875 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
2876 // CHECK: ret <2 x float> [[VECINIT1_I]]
test_vdup_n_f32(float32_t a)2877 float32x2_t test_vdup_n_f32(float32_t a) {
2878 return vdup_n_f32(a);
2879 }
2880
2881 // CHECK-LABEL: define <16 x i8> @test_vdupq_n_u8(i8 zeroext %a) #0 {
2882 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2883 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2884 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2885 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2886 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2887 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2888 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2889 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2890 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2891 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2892 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2893 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2894 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2895 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2896 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2897 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2898 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_u8(uint8_t a)2899 uint8x16_t test_vdupq_n_u8(uint8_t a) {
2900 return vdupq_n_u8(a);
2901 }
2902
2903 // CHECK-LABEL: define <8 x i16> @test_vdupq_n_u16(i16 zeroext %a) #0 {
2904 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2905 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2906 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2907 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2908 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2909 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2910 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2911 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2912 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_u16(uint16_t a)2913 uint16x8_t test_vdupq_n_u16(uint16_t a) {
2914 return vdupq_n_u16(a);
2915 }
2916
2917 // CHECK-LABEL: define <4 x i32> @test_vdupq_n_u32(i32 %a) #0 {
2918 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2919 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2920 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2921 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2922 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_u32(uint32_t a)2923 uint32x4_t test_vdupq_n_u32(uint32_t a) {
2924 return vdupq_n_u32(a);
2925 }
2926
2927 // CHECK-LABEL: define <16 x i8> @test_vdupq_n_s8(i8 signext %a) #0 {
2928 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2929 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2930 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2931 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2932 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2933 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2934 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2935 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2936 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2937 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2938 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2939 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2940 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2941 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2942 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2943 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2944 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_s8(int8_t a)2945 int8x16_t test_vdupq_n_s8(int8_t a) {
2946 return vdupq_n_s8(a);
2947 }
2948
2949 // CHECK-LABEL: define <8 x i16> @test_vdupq_n_s16(i16 signext %a) #0 {
2950 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2951 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2952 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2953 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2954 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2955 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2956 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2957 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2958 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_s16(int16_t a)2959 int16x8_t test_vdupq_n_s16(int16_t a) {
2960 return vdupq_n_s16(a);
2961 }
2962
2963 // CHECK-LABEL: define <4 x i32> @test_vdupq_n_s32(i32 %a) #0 {
2964 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2965 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2966 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2967 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2968 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_s32(int32_t a)2969 int32x4_t test_vdupq_n_s32(int32_t a) {
2970 return vdupq_n_s32(a);
2971 }
2972
2973 // CHECK-LABEL: define <16 x i8> @test_vdupq_n_p8(i8 signext %a) #0 {
2974 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2975 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2976 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2977 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2978 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2979 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2980 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2981 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2982 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2983 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2984 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2985 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2986 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2987 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2988 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2989 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2990 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_p8(poly8_t a)2991 poly8x16_t test_vdupq_n_p8(poly8_t a) {
2992 return vdupq_n_p8(a);
2993 }
2994
2995 // CHECK-LABEL: define <8 x i16> @test_vdupq_n_p16(i16 signext %a) #0 {
2996 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2997 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2998 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2999 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
3000 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
3001 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
3002 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
3003 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
3004 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_p16(poly16_t a)3005 poly16x8_t test_vdupq_n_p16(poly16_t a) {
3006 return vdupq_n_p16(a);
3007 }
3008
3009 // CHECK-LABEL: define <8 x half> @test_vdupq_n_f16(half* %a) #0 {
3010 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
3011 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
3012 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
3013 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
3014 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
3015 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
3016 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
3017 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
3018 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
3019 // CHECK: ret <8 x half> [[VECINIT7]]
test_vdupq_n_f16(float16_t * a)3020 float16x8_t test_vdupq_n_f16(float16_t *a) {
3021 return vdupq_n_f16(*a);
3022 }
3023
3024 // CHECK-LABEL: define <4 x float> @test_vdupq_n_f32(float %a) #0 {
3025 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
3026 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
3027 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
3028 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
3029 // CHECK: ret <4 x float> [[VECINIT3_I]]
test_vdupq_n_f32(float32_t a)3030 float32x4_t test_vdupq_n_f32(float32_t a) {
3031 return vdupq_n_f32(a);
3032 }
3033
3034 // CHECK-LABEL: define <1 x i64> @test_vdup_n_s64(i64 %a) #0 {
3035 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
3036 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
3037 // CHECK: ret <1 x i64> [[ADD_I]]
test_vdup_n_s64(int64_t a)3038 int64x1_t test_vdup_n_s64(int64_t a) {
3039 int64x1_t tmp = vdup_n_s64(a);
3040 return vadd_s64(tmp, tmp);
3041 }
3042
3043 // CHECK-LABEL: define <1 x i64> @test_vdup_n_u64(i64 %a) #0 {
3044 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
3045 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
3046 // CHECK: ret <1 x i64> [[ADD_I]]
test_vdup_n_u64(uint64_t a)3047 uint64x1_t test_vdup_n_u64(uint64_t a) {
3048 int64x1_t tmp = vdup_n_u64(a);
3049 return vadd_s64(tmp, tmp);
3050
3051 }
3052
3053 // CHECK-LABEL: define <2 x i64> @test_vdupq_n_s64(i64 %a) #0 {
3054 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
3055 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
3056 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
3057 // CHECK: ret <2 x i64> [[ADD_I]]
test_vdupq_n_s64(int64_t a)3058 int64x2_t test_vdupq_n_s64(int64_t a) {
3059 int64x2_t tmp = vdupq_n_s64(a);
3060 return vaddq_s64(tmp, tmp);
3061 }
3062
3063 // CHECK-LABEL: define <2 x i64> @test_vdupq_n_u64(i64 %a) #0 {
3064 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
3065 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
3066 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
3067 // CHECK: ret <2 x i64> [[ADD_I]]
test_vdupq_n_u64(uint64_t a)3068 uint64x2_t test_vdupq_n_u64(uint64_t a) {
3069 int64x2_t tmp = vdupq_n_u64(a);
3070 return vaddq_u64(tmp, tmp);
3071 }
3072
3073
3074 // CHECK-LABEL: define <8 x i8> @test_veor_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3075 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
3076 // CHECK: ret <8 x i8> [[XOR_I]]
test_veor_s8(int8x8_t a,int8x8_t b)3077 int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
3078 return veor_s8(a, b);
3079 }
3080
3081 // CHECK-LABEL: define <4 x i16> @test_veor_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3082 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
3083 // CHECK: ret <4 x i16> [[XOR_I]]
test_veor_s16(int16x4_t a,int16x4_t b)3084 int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
3085 return veor_s16(a, b);
3086 }
3087
3088 // CHECK-LABEL: define <2 x i32> @test_veor_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3089 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
3090 // CHECK: ret <2 x i32> [[XOR_I]]
test_veor_s32(int32x2_t a,int32x2_t b)3091 int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
3092 return veor_s32(a, b);
3093 }
3094
3095 // CHECK-LABEL: define <1 x i64> @test_veor_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3096 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
3097 // CHECK: ret <1 x i64> [[XOR_I]]
test_veor_s64(int64x1_t a,int64x1_t b)3098 int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
3099 return veor_s64(a, b);
3100 }
3101
3102 // CHECK-LABEL: define <8 x i8> @test_veor_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3103 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
3104 // CHECK: ret <8 x i8> [[XOR_I]]
test_veor_u8(uint8x8_t a,uint8x8_t b)3105 uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
3106 return veor_u8(a, b);
3107 }
3108
3109 // CHECK-LABEL: define <4 x i16> @test_veor_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3110 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
3111 // CHECK: ret <4 x i16> [[XOR_I]]
test_veor_u16(uint16x4_t a,uint16x4_t b)3112 uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
3113 return veor_u16(a, b);
3114 }
3115
3116 // CHECK-LABEL: define <2 x i32> @test_veor_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3117 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
3118 // CHECK: ret <2 x i32> [[XOR_I]]
test_veor_u32(uint32x2_t a,uint32x2_t b)3119 uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
3120 return veor_u32(a, b);
3121 }
3122
3123 // CHECK-LABEL: define <1 x i64> @test_veor_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3124 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
3125 // CHECK: ret <1 x i64> [[XOR_I]]
test_veor_u64(uint64x1_t a,uint64x1_t b)3126 uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
3127 return veor_u64(a, b);
3128 }
3129
3130 // CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3131 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
3132 // CHECK: ret <16 x i8> [[XOR_I]]
test_veorq_s8(int8x16_t a,int8x16_t b)3133 int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
3134 return veorq_s8(a, b);
3135 }
3136
3137 // CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3138 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
3139 // CHECK: ret <8 x i16> [[XOR_I]]
test_veorq_s16(int16x8_t a,int16x8_t b)3140 int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
3141 return veorq_s16(a, b);
3142 }
3143
3144 // CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3145 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
3146 // CHECK: ret <4 x i32> [[XOR_I]]
test_veorq_s32(int32x4_t a,int32x4_t b)3147 int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
3148 return veorq_s32(a, b);
3149 }
3150
3151 // CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3152 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3153 // CHECK: ret <2 x i64> [[XOR_I]]
test_veorq_s64(int64x2_t a,int64x2_t b)3154 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
3155 return veorq_s64(a, b);
3156 }
3157
3158 // CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3159 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
3160 // CHECK: ret <16 x i8> [[XOR_I]]
test_veorq_u8(uint8x16_t a,uint8x16_t b)3161 uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
3162 return veorq_u8(a, b);
3163 }
3164
3165 // CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3166 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
3167 // CHECK: ret <8 x i16> [[XOR_I]]
test_veorq_u16(uint16x8_t a,uint16x8_t b)3168 uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
3169 return veorq_u16(a, b);
3170 }
3171
3172 // CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3173 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
3174 // CHECK: ret <4 x i32> [[XOR_I]]
test_veorq_u32(uint32x4_t a,uint32x4_t b)3175 uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
3176 return veorq_u32(a, b);
3177 }
3178
3179 // CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3180 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3181 // CHECK: ret <2 x i64> [[XOR_I]]
test_veorq_u64(uint64x2_t a,uint64x2_t b)3182 uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
3183 return veorq_u64(a, b);
3184 }
3185
3186
3187 // CHECK-LABEL: define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3188 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3189 // CHECK: ret <8 x i8> [[VEXT]]
test_vext_s8(int8x8_t a,int8x8_t b)3190 int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
3191 return vext_s8(a, b, 7);
3192 }
3193
3194 // CHECK-LABEL: define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3195 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3196 // CHECK: ret <8 x i8> [[VEXT]]
test_vext_u8(uint8x8_t a,uint8x8_t b)3197 uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
3198 return vext_u8(a, b, 7);
3199 }
3200
3201 // CHECK-LABEL: define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) #0 {
3202 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3203 // CHECK: ret <8 x i8> [[VEXT]]
test_vext_p8(poly8x8_t a,poly8x8_t b)3204 poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
3205 return vext_p8(a, b, 7);
3206 }
3207
3208 // CHECK-LABEL: define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3210 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3211 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3212 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3213 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3214 // CHECK: ret <4 x i16> [[VEXT]]
test_vext_s16(int16x4_t a,int16x4_t b)3215 int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
3216 return vext_s16(a, b, 3);
3217 }
3218
3219 // CHECK-LABEL: define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3220 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3221 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3222 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3223 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3224 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3225 // CHECK: ret <4 x i16> [[VEXT]]
test_vext_u16(uint16x4_t a,uint16x4_t b)3226 uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
3227 return vext_u16(a, b, 3);
3228 }
3229
3230 // CHECK-LABEL: define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) #0 {
3231 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3232 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3233 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3234 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3235 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3236 // CHECK: ret <4 x i16> [[VEXT]]
test_vext_p16(poly16x4_t a,poly16x4_t b)3237 poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
3238 return vext_p16(a, b, 3);
3239 }
3240
3241 // CHECK-LABEL: define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3242 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3243 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3244 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3245 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3246 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3247 // CHECK: ret <2 x i32> [[VEXT]]
test_vext_s32(int32x2_t a,int32x2_t b)3248 int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
3249 return vext_s32(a, b, 1);
3250 }
3251
3252 // CHECK-LABEL: define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3254 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3255 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3256 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3257 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3258 // CHECK: ret <2 x i32> [[VEXT]]
test_vext_u32(uint32x2_t a,uint32x2_t b)3259 uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
3260 return vext_u32(a, b, 1);
3261 }
3262
3263 // CHECK-LABEL: define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3264 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3265 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3266 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3267 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3268 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3269 // CHECK: ret <1 x i64> [[VEXT]]
test_vext_s64(int64x1_t a,int64x1_t b)3270 int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
3271 return vext_s64(a, b, 0);
3272 }
3273
3274 // CHECK-LABEL: define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3275 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3276 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3277 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3278 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3279 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3280 // CHECK: ret <1 x i64> [[VEXT]]
test_vext_u64(uint64x1_t a,uint64x1_t b)3281 uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
3282 return vext_u64(a, b, 0);
3283 }
3284
3285 // CHECK-LABEL: define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) #0 {
3286 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3287 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3288 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3289 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3290 // CHECK: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2>
3291 // CHECK: ret <2 x float> [[VEXT]]
test_vext_f32(float32x2_t a,float32x2_t b)3292 float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
3293 return vext_f32(a, b, 1);
3294 }
3295
3296 // CHECK-LABEL: define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3297 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3298 // CHECK: ret <16 x i8> [[VEXT]]
test_vextq_s8(int8x16_t a,int8x16_t b)3299 int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
3300 return vextq_s8(a, b, 15);
3301 }
3302
3303 // CHECK-LABEL: define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3304 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3305 // CHECK: ret <16 x i8> [[VEXT]]
test_vextq_u8(uint8x16_t a,uint8x16_t b)3306 uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
3307 return vextq_u8(a, b, 15);
3308 }
3309
3310 // CHECK-LABEL: define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
3311 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3312 // CHECK: ret <16 x i8> [[VEXT]]
test_vextq_p8(poly8x16_t a,poly8x16_t b)3313 poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
3314 return vextq_p8(a, b, 15);
3315 }
3316
3317 // CHECK-LABEL: define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3318 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3319 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3320 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3321 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3322 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3323 // CHECK: ret <8 x i16> [[VEXT]]
test_vextq_s16(int16x8_t a,int16x8_t b)3324 int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
3325 return vextq_s16(a, b, 7);
3326 }
3327
3328 // CHECK-LABEL: define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3329 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3330 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3331 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3332 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3333 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3334 // CHECK: ret <8 x i16> [[VEXT]]
test_vextq_u16(uint16x8_t a,uint16x8_t b)3335 uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
3336 return vextq_u16(a, b, 7);
3337 }
3338
3339 // CHECK-LABEL: define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
3340 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3341 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3342 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3343 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3344 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3345 // CHECK: ret <8 x i16> [[VEXT]]
test_vextq_p16(poly16x8_t a,poly16x8_t b)3346 poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
3347 return vextq_p16(a, b, 7);
3348 }
3349
3350 // CHECK-LABEL: define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3351 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3352 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3353 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3354 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3355 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3356 // CHECK: ret <4 x i32> [[VEXT]]
test_vextq_s32(int32x4_t a,int32x4_t b)3357 int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
3358 return vextq_s32(a, b, 3);
3359 }
3360
3361 // CHECK-LABEL: define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3362 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3363 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3364 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3365 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3366 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3367 // CHECK: ret <4 x i32> [[VEXT]]
test_vextq_u32(uint32x4_t a,uint32x4_t b)3368 uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
3369 return vextq_u32(a, b, 3);
3370 }
3371
3372 // CHECK-LABEL: define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3373 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3374 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3375 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3376 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3377 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3378 // CHECK: ret <2 x i64> [[VEXT]]
test_vextq_s64(int64x2_t a,int64x2_t b)3379 int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
3380 return vextq_s64(a, b, 1);
3381 }
3382
3383 // CHECK-LABEL: define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3384 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3385 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3386 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3387 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3388 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3389 // CHECK: ret <2 x i64> [[VEXT]]
test_vextq_u64(uint64x2_t a,uint64x2_t b)3390 uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
3391 return vextq_u64(a, b, 1);
3392 }
3393
3394 // CHECK-LABEL: define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) #0 {
3395 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3396 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3397 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3398 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3399 // CHECK: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3400 // CHECK: ret <4 x float> [[VEXT]]
test_vextq_f32(float32x4_t a,float32x4_t b)3401 float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
3402 return vextq_f32(a, b, 3);
3403 }
3404
3405
3406 // CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
3407 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3408 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3409 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3410 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3411 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3412 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
3413 // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
3414 // CHECK: ret <2 x float> [[TMP6]]
test_vfma_f32(float32x2_t a,float32x2_t b,float32x2_t c)3415 float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3416 return vfma_f32(a, b, c);
3417 }
3418
3419 // CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
3420 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3421 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3422 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3423 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3424 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3425 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
3426 // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
3427 // CHECK: ret <4 x float> [[TMP6]]
test_vfmaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3428 float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3429 return vfmaq_f32(a, b, c);
3430 }
3431
3432 // CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
3433 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
3434 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3435 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
3436 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3437 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3438 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3439 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
3440 // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
3441 // CHECK: ret <2 x float> [[TMP6]]
test_vfms_f32(float32x2_t a,float32x2_t b,float32x2_t c)3442 float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3443 return vfms_f32(a, b, c);
3444 }
3445
3446 // CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
3447 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
3448 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3449 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
3450 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3451 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3452 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3453 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
3454 // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
3455 // CHECK: ret <4 x float> [[TMP6]]
test_vfmsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3456 float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3457 return vfmsq_f32(a, b, c);
3458 }
3459
3460
3461 // CHECK-LABEL: define <8 x i8> @test_vget_high_s8(<16 x i8> %a) #0 {
3462 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3463 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_s8(int8x16_t a)3464 int8x8_t test_vget_high_s8(int8x16_t a) {
3465 return vget_high_s8(a);
3466 }
3467
3468 // CHECK-LABEL: define <4 x i16> @test_vget_high_s16(<8 x i16> %a) #0 {
3469 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3470 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_s16(int16x8_t a)3471 int16x4_t test_vget_high_s16(int16x8_t a) {
3472 return vget_high_s16(a);
3473 }
3474
3475 // CHECK-LABEL: define <2 x i32> @test_vget_high_s32(<4 x i32> %a) #0 {
3476 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3477 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_s32(int32x4_t a)3478 int32x2_t test_vget_high_s32(int32x4_t a) {
3479 return vget_high_s32(a);
3480 }
3481
3482 // CHECK-LABEL: define <1 x i64> @test_vget_high_s64(<2 x i64> %a) #0 {
3483 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3484 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_s64(int64x2_t a)3485 int64x1_t test_vget_high_s64(int64x2_t a) {
3486 return vget_high_s64(a);
3487 }
3488
3489 // CHECK-LABEL: define <4 x half> @test_vget_high_f16(<8 x half> %a) #0 {
3490 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3491 // CHECK: ret <4 x half> [[SHUFFLE_I]]
test_vget_high_f16(float16x8_t a)3492 float16x4_t test_vget_high_f16(float16x8_t a) {
3493 return vget_high_f16(a);
3494 }
3495
3496 // CHECK-LABEL: define <2 x float> @test_vget_high_f32(<4 x float> %a) #0 {
3497 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
3498 // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vget_high_f32(float32x4_t a)3499 float32x2_t test_vget_high_f32(float32x4_t a) {
3500 return vget_high_f32(a);
3501 }
3502
3503 // CHECK-LABEL: define <8 x i8> @test_vget_high_u8(<16 x i8> %a) #0 {
3504 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3505 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_u8(uint8x16_t a)3506 uint8x8_t test_vget_high_u8(uint8x16_t a) {
3507 return vget_high_u8(a);
3508 }
3509
3510 // CHECK-LABEL: define <4 x i16> @test_vget_high_u16(<8 x i16> %a) #0 {
3511 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3512 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_u16(uint16x8_t a)3513 uint16x4_t test_vget_high_u16(uint16x8_t a) {
3514 return vget_high_u16(a);
3515 }
3516
3517 // CHECK-LABEL: define <2 x i32> @test_vget_high_u32(<4 x i32> %a) #0 {
3518 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3519 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_u32(uint32x4_t a)3520 uint32x2_t test_vget_high_u32(uint32x4_t a) {
3521 return vget_high_u32(a);
3522 }
3523
3524 // CHECK-LABEL: define <1 x i64> @test_vget_high_u64(<2 x i64> %a) #0 {
3525 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3526 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_u64(uint64x2_t a)3527 uint64x1_t test_vget_high_u64(uint64x2_t a) {
3528 return vget_high_u64(a);
3529 }
3530
3531 // CHECK-LABEL: define <8 x i8> @test_vget_high_p8(<16 x i8> %a) #0 {
3532 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3533 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_p8(poly8x16_t a)3534 poly8x8_t test_vget_high_p8(poly8x16_t a) {
3535 return vget_high_p8(a);
3536 }
3537
3538 // CHECK-LABEL: define <4 x i16> @test_vget_high_p16(<8 x i16> %a) #0 {
3539 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3540 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_p16(poly16x8_t a)3541 poly16x4_t test_vget_high_p16(poly16x8_t a) {
3542 return vget_high_p16(a);
3543 }
3544
3545
3546 // CHECK-LABEL: define zeroext i8 @test_vget_lane_u8(<8 x i8> %a) #0 {
3547 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3548 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_u8(uint8x8_t a)3549 uint8_t test_vget_lane_u8(uint8x8_t a) {
3550 return vget_lane_u8(a, 7);
3551 }
3552
3553 // CHECK-LABEL: define zeroext i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
3554 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3555 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3556 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
3557 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_u16(uint16x4_t a)3558 uint16_t test_vget_lane_u16(uint16x4_t a) {
3559 return vget_lane_u16(a, 3);
3560 }
3561
3562 // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
3563 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3564 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3565 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
3566 // CHECK: ret i32 [[VGET_LANE]]
test_vget_lane_u32(uint32x2_t a)3567 uint32_t test_vget_lane_u32(uint32x2_t a) {
3568 return vget_lane_u32(a, 1);
3569 }
3570
3571 // CHECK-LABEL: define signext i8 @test_vget_lane_s8(<8 x i8> %a) #0 {
3572 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3573 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_s8(int8x8_t a)3574 int8_t test_vget_lane_s8(int8x8_t a) {
3575 return vget_lane_s8(a, 7);
3576 }
3577
3578 // CHECK-LABEL: define signext i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
3579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3580 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3581 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
3582 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_s16(int16x4_t a)3583 int16_t test_vget_lane_s16(int16x4_t a) {
3584 return vget_lane_s16(a, 3);
3585 }
3586
3587 // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
3588 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3589 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3590 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
3591 // CHECK: ret i32 [[VGET_LANE]]
test_vget_lane_s32(int32x2_t a)3592 int32_t test_vget_lane_s32(int32x2_t a) {
3593 return vget_lane_s32(a, 1);
3594 }
3595
3596 // CHECK-LABEL: define signext i8 @test_vget_lane_p8(<8 x i8> %a) #0 {
3597 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3598 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_p8(poly8x8_t a)3599 poly8_t test_vget_lane_p8(poly8x8_t a) {
3600 return vget_lane_p8(a, 7);
3601 }
3602
3603 // CHECK-LABEL: define signext i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
3604 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3605 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3606 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
3607 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_p16(poly16x4_t a)3608 poly16_t test_vget_lane_p16(poly16x4_t a) {
3609 return vget_lane_p16(a, 3);
3610 }
3611
3612 // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
3613 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3614 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3615 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
3616 // CHECK: ret float [[VGET_LANE]]
test_vget_lane_f32(float32x2_t a)3617 float32_t test_vget_lane_f32(float32x2_t a) {
3618 return vget_lane_f32(a, 1);
3619 }
3620
3621 // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 {
3622 // CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8
3623 // CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2
3624 // CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
3625 // CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
3626 // CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
3627 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
3628 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
3629 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
3630 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
3631 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
3632 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3633 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
3634 // CHECK: ret float [[CONV]]
test_vget_lane_f16(float16x4_t a)3635 float32_t test_vget_lane_f16(float16x4_t a) {
3636 return vget_lane_f16(a, 1);
3637 }
3638
3639 // CHECK-LABEL: define zeroext i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 {
3640 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3641 // CHECK: ret i8 [[VGET_LANE]]
test_vgetq_lane_u8(uint8x16_t a)3642 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
3643 return vgetq_lane_u8(a, 15);
3644 }
3645
3646 // CHECK-LABEL: define zeroext i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
3647 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3648 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3649 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
3650 // CHECK: ret i16 [[VGET_LANE]]
test_vgetq_lane_u16(uint16x8_t a)3651 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
3652 return vgetq_lane_u16(a, 7);
3653 }
3654
3655 // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 {
3656 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3657 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3658 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
3659 // CHECK: ret i32 [[VGET_LANE]]
test_vgetq_lane_u32(uint32x4_t a)3660 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
3661 return vgetq_lane_u32(a, 3);
3662 }
3663
3664 // CHECK-LABEL: define signext i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 {
3665 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3666 // CHECK: ret i8 [[VGET_LANE]]
test_vgetq_lane_s8(int8x16_t a)3667 int8_t test_vgetq_lane_s8(int8x16_t a) {
3668 return vgetq_lane_s8(a, 15);
3669 }
3670
3671 // CHECK-LABEL: define signext i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 {
3672 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3673 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3674 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
3675 // CHECK: ret i16 [[VGET_LANE]]
test_vgetq_lane_s16(int16x8_t a)3676 int16_t test_vgetq_lane_s16(int16x8_t a) {
3677 return vgetq_lane_s16(a, 7);
3678 }
3679
3680 // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 {
3681 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3682 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3683 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
3684 // CHECK: ret i32 [[VGET_LANE]]
test_vgetq_lane_s32(int32x4_t a)3685 int32_t test_vgetq_lane_s32(int32x4_t a) {
3686 return vgetq_lane_s32(a, 3);
3687 }
3688
3689 // CHECK-LABEL: define signext i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 {
3690 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3691 // CHECK: ret i8 [[VGET_LANE]]
test_vgetq_lane_p8(poly8x16_t a)3692 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
3693 return vgetq_lane_p8(a, 15);
3694 }
3695
3696 // CHECK-LABEL: define signext i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 {
3697 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3698 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3699 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
3700 // CHECK: ret i16 [[VGET_LANE]]
test_vgetq_lane_p16(poly16x8_t a)3701 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
3702 return vgetq_lane_p16(a, 7);
3703 }
3704
3705 // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 {
3706 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3707 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3708 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
3709 // CHECK: ret float [[VGET_LANE]]
test_vgetq_lane_f32(float32x4_t a)3710 float32_t test_vgetq_lane_f32(float32x4_t a) {
3711 return vgetq_lane_f32(a, 3);
3712 }
3713
3714 // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 {
3715 // CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
3716 // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
3717 // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
3718 // CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
3719 // CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
3720 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
3721 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
3722 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
3723 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2
3724 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
3725 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3726 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
3727 // CHECK: ret float [[CONV]]
test_vgetq_lane_f16(float16x8_t a)3728 float32_t test_vgetq_lane_f16(float16x8_t a) {
3729 return vgetq_lane_f16(a, 3);
3730 }
3731
3732 // The optimizer is able to remove all moves now.
3733 // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
3734 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3735 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3736 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
3737 // CHECK: ret i64 [[VGET_LANE]]
test_vget_lane_s64(int64x1_t a)3738 int64_t test_vget_lane_s64(int64x1_t a) {
3739 return vget_lane_s64(a, 0);
3740 }
3741
3742 // The optimizer is able to remove all moves now.
3743 // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
3744 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3745 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3746 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
3747 // CHECK: ret i64 [[VGET_LANE]]
test_vget_lane_u64(uint64x1_t a)3748 uint64_t test_vget_lane_u64(uint64x1_t a) {
3749 return vget_lane_u64(a, 0);
3750 }
3751
3752 // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 {
3753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3754 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3755 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
3756 // CHECK: ret i64 [[VGET_LANE]]
test_vgetq_lane_s64(int64x2_t a)3757 int64_t test_vgetq_lane_s64(int64x2_t a) {
3758 return vgetq_lane_s64(a, 1);
3759 }
3760
3761 // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 {
3762 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3763 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3764 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
3765 // CHECK: ret i64 [[VGET_LANE]]
test_vgetq_lane_u64(uint64x2_t a)3766 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
3767 return vgetq_lane_u64(a, 1);
3768 }
3769
3770
3771 // CHECK-LABEL: define <8 x i8> @test_vget_low_s8(<16 x i8> %a) #0 {
3772 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3773 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_s8(int8x16_t a)3774 int8x8_t test_vget_low_s8(int8x16_t a) {
3775 return vget_low_s8(a);
3776 }
3777
3778 // CHECK-LABEL: define <4 x i16> @test_vget_low_s16(<8 x i16> %a) #0 {
3779 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3780 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_s16(int16x8_t a)3781 int16x4_t test_vget_low_s16(int16x8_t a) {
3782 return vget_low_s16(a);
3783 }
3784
3785 // CHECK-LABEL: define <2 x i32> @test_vget_low_s32(<4 x i32> %a) #0 {
3786 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3787 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_s32(int32x4_t a)3788 int32x2_t test_vget_low_s32(int32x4_t a) {
3789 return vget_low_s32(a);
3790 }
3791
3792 // CHECK-LABEL: define <1 x i64> @test_vget_low_s64(<2 x i64> %a) #0 {
3793 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3794 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_s64(int64x2_t a)3795 int64x1_t test_vget_low_s64(int64x2_t a) {
3796 return vget_low_s64(a);
3797 }
3798
3799 // CHECK-LABEL: define <4 x half> @test_vget_low_f16(<8 x half> %a) #0 {
3800 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3801 // CHECK: ret <4 x half> [[SHUFFLE_I]]
test_vget_low_f16(float16x8_t a)3802 float16x4_t test_vget_low_f16(float16x8_t a) {
3803 return vget_low_f16(a);
3804 }
3805
3806 // CHECK-LABEL: define <2 x float> @test_vget_low_f32(<4 x float> %a) #0 {
3807 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
3808 // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vget_low_f32(float32x4_t a)3809 float32x2_t test_vget_low_f32(float32x4_t a) {
3810 return vget_low_f32(a);
3811 }
3812
3813 // CHECK-LABEL: define <8 x i8> @test_vget_low_u8(<16 x i8> %a) #0 {
3814 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3815 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_u8(uint8x16_t a)3816 uint8x8_t test_vget_low_u8(uint8x16_t a) {
3817 return vget_low_u8(a);
3818 }
3819
3820 // CHECK-LABEL: define <4 x i16> @test_vget_low_u16(<8 x i16> %a) #0 {
3821 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3822 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_u16(uint16x8_t a)3823 uint16x4_t test_vget_low_u16(uint16x8_t a) {
3824 return vget_low_u16(a);
3825 }
3826
3827 // CHECK-LABEL: define <2 x i32> @test_vget_low_u32(<4 x i32> %a) #0 {
3828 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3829 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_u32(uint32x4_t a)3830 uint32x2_t test_vget_low_u32(uint32x4_t a) {
3831 return vget_low_u32(a);
3832 }
3833
3834 // CHECK-LABEL: define <1 x i64> @test_vget_low_u64(<2 x i64> %a) #0 {
3835 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3836 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_u64(uint64x2_t a)3837 uint64x1_t test_vget_low_u64(uint64x2_t a) {
3838 return vget_low_u64(a);
3839 }
3840
3841 // CHECK-LABEL: define <8 x i8> @test_vget_low_p8(<16 x i8> %a) #0 {
3842 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3843 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_p8(poly8x16_t a)3844 poly8x8_t test_vget_low_p8(poly8x16_t a) {
3845 return vget_low_p8(a);
3846 }
3847
3848 // CHECK-LABEL: define <4 x i16> @test_vget_low_p16(<8 x i16> %a) #0 {
3849 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3850 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_p16(poly16x8_t a)3851 poly16x4_t test_vget_low_p16(poly16x8_t a) {
3852 return vget_low_p16(a);
3853 }
3854
3855
3856 // CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3857 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3858 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t a,int8x8_t b)3859 int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) {
3860 return vhadd_s8(a, b);
3861 }
3862
3863 // CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3864 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3865 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3866 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3867 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3868 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
3869 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3870 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
3871 // CHECK: ret <4 x i16> [[TMP2]]
test_vhadd_s16(int16x4_t a,int16x4_t b)3872 int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) {
3873 return vhadd_s16(a, b);
3874 }
3875
3876 // CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3877 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3878 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3879 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3880 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3881 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
3882 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3883 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
3884 // CHECK: ret <2 x i32> [[TMP2]]
test_vhadd_s32(int32x2_t a,int32x2_t b)3885 int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) {
3886 return vhadd_s32(a, b);
3887 }
3888
3889 // CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3890 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3891 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t a,uint8x8_t b)3892 uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) {
3893 return vhadd_u8(a, b);
3894 }
3895
3896 // CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3897 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3898 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3899 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3900 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3901 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
3902 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3903 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
3904 // CHECK: ret <4 x i16> [[TMP2]]
test_vhadd_u16(uint16x4_t a,uint16x4_t b)3905 uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) {
3906 return vhadd_u16(a, b);
3907 }
3908
3909 // CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3910 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3911 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3912 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3913 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3914 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
3915 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3916 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
3917 // CHECK: ret <2 x i32> [[TMP2]]
test_vhadd_u32(uint32x2_t a,uint32x2_t b)3918 uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) {
3919 return vhadd_u32(a, b);
3920 }
3921
3922 // CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3923 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3924 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t a,int8x16_t b)3925 int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) {
3926 return vhaddq_s8(a, b);
3927 }
3928
3929 // CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3930 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3931 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3932 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3933 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3934 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
3935 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3936 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
3937 // CHECK: ret <8 x i16> [[TMP2]]
test_vhaddq_s16(int16x8_t a,int16x8_t b)3938 int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) {
3939 return vhaddq_s16(a, b);
3940 }
3941
3942 // CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3943 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3944 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3945 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3946 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3947 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
3948 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3949 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
3950 // CHECK: ret <4 x i32> [[TMP2]]
test_vhaddq_s32(int32x4_t a,int32x4_t b)3951 int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) {
3952 return vhaddq_s32(a, b);
3953 }
3954
3955 // CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3956 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3957 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t a,uint8x16_t b)3958 uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) {
3959 return vhaddq_u8(a, b);
3960 }
3961
3962 // CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3963 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3964 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3965 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3966 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3967 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
3968 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3969 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
3970 // CHECK: ret <8 x i16> [[TMP2]]
test_vhaddq_u16(uint16x8_t a,uint16x8_t b)3971 uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) {
3972 return vhaddq_u16(a, b);
3973 }
3974
3975 // CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3976 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3977 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3978 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3979 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3980 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
3981 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3982 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
3983 // CHECK: ret <4 x i32> [[TMP2]]
test_vhaddq_u32(uint32x4_t a,uint32x4_t b)3984 uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) {
3985 return vhaddq_u32(a, b);
3986 }
3987
3988
3989 // CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3990 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3991 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t a,int8x8_t b)3992 int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) {
3993 return vhsub_s8(a, b);
3994 }
3995
3996 // CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3997 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3998 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3999 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4000 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4001 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
4002 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
4003 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
4004 // CHECK: ret <4 x i16> [[TMP2]]
test_vhsub_s16(int16x4_t a,int16x4_t b)4005 int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) {
4006 return vhsub_s16(a, b);
4007 }
4008
4009 // CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4010 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4011 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4012 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4013 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4014 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
4015 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
4016 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
4017 // CHECK: ret <2 x i32> [[TMP2]]
test_vhsub_s32(int32x2_t a,int32x2_t b)4018 int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) {
4019 return vhsub_s32(a, b);
4020 }
4021
4022 // CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4023 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4024 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t a,uint8x8_t b)4025 uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) {
4026 return vhsub_u8(a, b);
4027 }
4028
4029 // CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4030 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4031 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4032 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4033 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4034 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
4035 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
4036 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
4037 // CHECK: ret <4 x i16> [[TMP2]]
test_vhsub_u16(uint16x4_t a,uint16x4_t b)4038 uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) {
4039 return vhsub_u16(a, b);
4040 }
4041
4042 // CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4043 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4044 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4045 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4046 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4047 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
4048 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
4049 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
4050 // CHECK: ret <2 x i32> [[TMP2]]
test_vhsub_u32(uint32x2_t a,uint32x2_t b)4051 uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) {
4052 return vhsub_u32(a, b);
4053 }
4054
4055 // CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4056 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4057 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t a,int8x16_t b)4058 int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) {
4059 return vhsubq_s8(a, b);
4060 }
4061
4062 // CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4063 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4064 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4065 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4066 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4067 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
4068 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
4069 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
4070 // CHECK: ret <8 x i16> [[TMP2]]
test_vhsubq_s16(int16x8_t a,int16x8_t b)4071 int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) {
4072 return vhsubq_s16(a, b);
4073 }
4074
4075 // CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4076 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4077 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4078 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4079 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4080 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
4081 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
4082 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
4083 // CHECK: ret <4 x i32> [[TMP2]]
test_vhsubq_s32(int32x4_t a,int32x4_t b)4084 int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) {
4085 return vhsubq_s32(a, b);
4086 }
4087
4088 // CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4089 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4090 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t a,uint8x16_t b)4091 uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) {
4092 return vhsubq_u8(a, b);
4093 }
4094
4095 // CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4096 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4097 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4098 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4099 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4100 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
4101 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
4102 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
4103 // CHECK: ret <8 x i16> [[TMP2]]
test_vhsubq_u16(uint16x8_t a,uint16x8_t b)4104 uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) {
4105 return vhsubq_u16(a, b);
4106 }
4107
4108 // CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4109 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4110 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4111 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4112 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4113 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
4114 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
4115 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
4116 // CHECK: ret <4 x i32> [[TMP2]]
test_vhsubq_u32(uint32x4_t a,uint32x4_t b)4117 uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) {
4118 return vhsubq_u32(a, b);
4119 }
4120
4121
4122 // CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 {
4123 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
4124 // CHECK: ret <16 x i8> [[VLD1]]
test_vld1q_u8(uint8_t const * a)4125 uint8x16_t test_vld1q_u8(uint8_t const * a) {
4126 return vld1q_u8(a);
4127 }
4128
4129 // CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 {
4130 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4131 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
4132 // CHECK: ret <8 x i16> [[VLD1]]
test_vld1q_u16(uint16_t const * a)4133 uint16x8_t test_vld1q_u16(uint16_t const * a) {
4134 return vld1q_u16(a);
4135 }
4136
4137 // CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 {
4138 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4139 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
4140 // CHECK: ret <4 x i32> [[VLD1]]
test_vld1q_u32(uint32_t const * a)4141 uint32x4_t test_vld1q_u32(uint32_t const * a) {
4142 return vld1q_u32(a);
4143 }
4144
4145 // CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 {
4146 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4147 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
4148 // CHECK: ret <2 x i64> [[VLD1]]
test_vld1q_u64(uint64_t const * a)4149 uint64x2_t test_vld1q_u64(uint64_t const * a) {
4150 return vld1q_u64(a);
4151 }
4152
4153 // CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 {
4154 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
4155 // CHECK: ret <16 x i8> [[VLD1]]
test_vld1q_s8(int8_t const * a)4156 int8x16_t test_vld1q_s8(int8_t const * a) {
4157 return vld1q_s8(a);
4158 }
4159
4160 // CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 {
4161 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4162 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
4163 // CHECK: ret <8 x i16> [[VLD1]]
test_vld1q_s16(int16_t const * a)4164 int16x8_t test_vld1q_s16(int16_t const * a) {
4165 return vld1q_s16(a);
4166 }
4167
4168 // CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 {
4169 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4170 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
4171 // CHECK: ret <4 x i32> [[VLD1]]
test_vld1q_s32(int32_t const * a)4172 int32x4_t test_vld1q_s32(int32_t const * a) {
4173 return vld1q_s32(a);
4174 }
4175
4176 // CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 {
4177 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4178 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
4179 // CHECK: ret <2 x i64> [[VLD1]]
test_vld1q_s64(int64_t const * a)4180 int64x2_t test_vld1q_s64(int64_t const * a) {
4181 return vld1q_s64(a);
4182 }
4183
4184 // CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 {
4185 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4186 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
4187 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VLD1]] to <8 x half>
4188 // CHECK: ret <8 x half> [[TMP1]]
test_vld1q_f16(float16_t const * a)4189 float16x8_t test_vld1q_f16(float16_t const * a) {
4190 return vld1q_f16(a);
4191 }
4192
4193 // CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 {
4194 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4195 // CHECK: [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* [[TMP0]], i32 4)
4196 // CHECK: ret <4 x float> [[VLD1]]
test_vld1q_f32(float32_t const * a)4197 float32x4_t test_vld1q_f32(float32_t const * a) {
4198 return vld1q_f32(a);
4199 }
4200
4201 // CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 {
4202 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
4203 // CHECK: ret <16 x i8> [[VLD1]]
test_vld1q_p8(poly8_t const * a)4204 poly8x16_t test_vld1q_p8(poly8_t const * a) {
4205 return vld1q_p8(a);
4206 }
4207
4208 // CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 {
4209 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4210 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
4211 // CHECK: ret <8 x i16> [[VLD1]]
test_vld1q_p16(poly16_t const * a)4212 poly16x8_t test_vld1q_p16(poly16_t const * a) {
4213 return vld1q_p16(a);
4214 }
4215
4216 // CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 {
4217 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
4218 // CHECK: ret <8 x i8> [[VLD1]]
test_vld1_u8(uint8_t const * a)4219 uint8x8_t test_vld1_u8(uint8_t const * a) {
4220 return vld1_u8(a);
4221 }
4222
4223 // CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 {
4224 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4225 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4226 // CHECK: ret <4 x i16> [[VLD1]]
test_vld1_u16(uint16_t const * a)4227 uint16x4_t test_vld1_u16(uint16_t const * a) {
4228 return vld1_u16(a);
4229 }
4230
4231 // CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 {
4232 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4233 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
4234 // CHECK: ret <2 x i32> [[VLD1]]
test_vld1_u32(uint32_t const * a)4235 uint32x2_t test_vld1_u32(uint32_t const * a) {
4236 return vld1_u32(a);
4237 }
4238
4239 // CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 {
4240 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4241 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4242 // CHECK: ret <1 x i64> [[VLD1]]
test_vld1_u64(uint64_t const * a)4243 uint64x1_t test_vld1_u64(uint64_t const * a) {
4244 return vld1_u64(a);
4245 }
4246
4247 // CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 {
4248 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
4249 // CHECK: ret <8 x i8> [[VLD1]]
test_vld1_s8(int8_t const * a)4250 int8x8_t test_vld1_s8(int8_t const * a) {
4251 return vld1_s8(a);
4252 }
4253
4254 // CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 {
4255 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4256 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4257 // CHECK: ret <4 x i16> [[VLD1]]
test_vld1_s16(int16_t const * a)4258 int16x4_t test_vld1_s16(int16_t const * a) {
4259 return vld1_s16(a);
4260 }
4261
4262 // CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 {
4263 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4264 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
4265 // CHECK: ret <2 x i32> [[VLD1]]
test_vld1_s32(int32_t const * a)4266 int32x2_t test_vld1_s32(int32_t const * a) {
4267 return vld1_s32(a);
4268 }
4269
4270 // CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 {
4271 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4272 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4273 // CHECK: ret <1 x i64> [[VLD1]]
test_vld1_s64(int64_t const * a)4274 int64x1_t test_vld1_s64(int64_t const * a) {
4275 return vld1_s64(a);
4276 }
4277
4278 // CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 {
4279 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4280 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4281 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VLD1]] to <4 x half>
4282 // CHECK: ret <4 x half> [[TMP1]]
test_vld1_f16(float16_t const * a)4283 float16x4_t test_vld1_f16(float16_t const * a) {
4284 return vld1_f16(a);
4285 }
4286
4287 // CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 {
4288 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4289 // CHECK: [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* [[TMP0]], i32 4)
4290 // CHECK: ret <2 x float> [[VLD1]]
test_vld1_f32(float32_t const * a)4291 float32x2_t test_vld1_f32(float32_t const * a) {
4292 return vld1_f32(a);
4293 }
4294
4295 // CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 {
4296 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
4297 // CHECK: ret <8 x i8> [[VLD1]]
test_vld1_p8(poly8_t const * a)4298 poly8x8_t test_vld1_p8(poly8_t const * a) {
4299 return vld1_p8(a);
4300 }
4301
4302 // CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 {
4303 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4304 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4305 // CHECK: ret <4 x i16> [[VLD1]]
test_vld1_p16(poly16_t const * a)4306 poly16x4_t test_vld1_p16(poly16_t const * a) {
4307 return vld1_p16(a);
4308 }
4309
4310
4311 // CHECK-LABEL: define <16 x i8> @test_vld1q_dup_u8(i8* %a) #0 {
4312 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4313 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4314 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4315 // CHECK: ret <16 x i8> [[LANE]]
test_vld1q_dup_u8(uint8_t const * a)4316 uint8x16_t test_vld1q_dup_u8(uint8_t const * a) {
4317 return vld1q_dup_u8(a);
4318 }
4319
4320 // CHECK-LABEL: define <8 x i16> @test_vld1q_dup_u16(i16* %a) #0 {
4321 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4322 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4323 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4324 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4325 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4326 // CHECK: ret <8 x i16> [[LANE]]
test_vld1q_dup_u16(uint16_t const * a)4327 uint16x8_t test_vld1q_dup_u16(uint16_t const * a) {
4328 return vld1q_dup_u16(a);
4329 }
4330
4331 // CHECK-LABEL: define <4 x i32> @test_vld1q_dup_u32(i32* %a) #0 {
4332 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4333 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4334 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4335 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4336 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4337 // CHECK: ret <4 x i32> [[LANE]]
test_vld1q_dup_u32(uint32_t const * a)4338 uint32x4_t test_vld1q_dup_u32(uint32_t const * a) {
4339 return vld1q_dup_u32(a);
4340 }
4341
4342 // CHECK-LABEL: define <2 x i64> @test_vld1q_dup_u64(i64* %a) #0 {
4343 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4344 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4345 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4346 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4347 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4348 // CHECK: ret <2 x i64> [[LANE]]
test_vld1q_dup_u64(uint64_t const * a)4349 uint64x2_t test_vld1q_dup_u64(uint64_t const * a) {
4350 return vld1q_dup_u64(a);
4351 }
4352
4353 // CHECK-LABEL: define <16 x i8> @test_vld1q_dup_s8(i8* %a) #0 {
4354 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4355 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4356 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4357 // CHECK: ret <16 x i8> [[LANE]]
test_vld1q_dup_s8(int8_t const * a)4358 int8x16_t test_vld1q_dup_s8(int8_t const * a) {
4359 return vld1q_dup_s8(a);
4360 }
4361
4362 // CHECK-LABEL: define <8 x i16> @test_vld1q_dup_s16(i16* %a) #0 {
4363 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4364 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4365 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4366 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4367 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4368 // CHECK: ret <8 x i16> [[LANE]]
test_vld1q_dup_s16(int16_t const * a)4369 int16x8_t test_vld1q_dup_s16(int16_t const * a) {
4370 return vld1q_dup_s16(a);
4371 }
4372
4373 // CHECK-LABEL: define <4 x i32> @test_vld1q_dup_s32(i32* %a) #0 {
4374 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4375 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4376 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4377 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4378 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4379 // CHECK: ret <4 x i32> [[LANE]]
test_vld1q_dup_s32(int32_t const * a)4380 int32x4_t test_vld1q_dup_s32(int32_t const * a) {
4381 return vld1q_dup_s32(a);
4382 }
4383
4384 // CHECK-LABEL: define <2 x i64> @test_vld1q_dup_s64(i64* %a) #0 {
4385 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4386 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4387 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4388 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4389 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4390 // CHECK: ret <2 x i64> [[LANE]]
test_vld1q_dup_s64(int64_t const * a)4391 int64x2_t test_vld1q_dup_s64(int64_t const * a) {
4392 return vld1q_dup_s64(a);
4393 }
4394
4395 // CHECK-LABEL: define <8 x half> @test_vld1q_dup_f16(half* %a) #0 {
4396 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4397 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4398 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4399 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4400 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4401 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half>
4402 // CHECK: ret <8 x half> [[TMP4]]
test_vld1q_dup_f16(float16_t const * a)4403 float16x8_t test_vld1q_dup_f16(float16_t const * a) {
4404 return vld1q_dup_f16(a);
4405 }
4406
4407 // CHECK-LABEL: define <4 x float> @test_vld1q_dup_f32(float* %a) #0 {
4408 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4409 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4410 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4411 // CHECK: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
4412 // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer
4413 // CHECK: ret <4 x float> [[LANE]]
test_vld1q_dup_f32(float32_t const * a)4414 float32x4_t test_vld1q_dup_f32(float32_t const * a) {
4415 return vld1q_dup_f32(a);
4416 }
4417
4418 // CHECK-LABEL: define <16 x i8> @test_vld1q_dup_p8(i8* %a) #0 {
4419 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4420 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4421 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4422 // CHECK: ret <16 x i8> [[LANE]]
test_vld1q_dup_p8(poly8_t const * a)4423 poly8x16_t test_vld1q_dup_p8(poly8_t const * a) {
4424 return vld1q_dup_p8(a);
4425 }
4426
4427 // CHECK-LABEL: define <8 x i16> @test_vld1q_dup_p16(i16* %a) #0 {
4428 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4429 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4430 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4431 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4432 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4433 // CHECK: ret <8 x i16> [[LANE]]
test_vld1q_dup_p16(poly16_t const * a)4434 poly16x8_t test_vld1q_dup_p16(poly16_t const * a) {
4435 return vld1q_dup_p16(a);
4436 }
4437
4438 // CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #0 {
4439 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4440 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4441 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4442 // CHECK: ret <8 x i8> [[LANE]]
test_vld1_dup_u8(uint8_t const * a)4443 uint8x8_t test_vld1_dup_u8(uint8_t const * a) {
4444 return vld1_dup_u8(a);
4445 }
4446
4447 // CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #0 {
4448 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4449 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4450 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4451 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4452 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4453 // CHECK: ret <4 x i16> [[LANE]]
test_vld1_dup_u16(uint16_t const * a)4454 uint16x4_t test_vld1_dup_u16(uint16_t const * a) {
4455 return vld1_dup_u16(a);
4456 }
4457
4458 // CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #0 {
4459 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4460 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4461 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4462 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4463 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4464 // CHECK: ret <2 x i32> [[LANE]]
test_vld1_dup_u32(uint32_t const * a)4465 uint32x2_t test_vld1_dup_u32(uint32_t const * a) {
4466 return vld1_dup_u32(a);
4467 }
4468
4469 // CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #0 {
4470 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4471 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4472 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4473 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4474 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4475 // CHECK: ret <1 x i64> [[LANE]]
test_vld1_dup_u64(uint64_t const * a)4476 uint64x1_t test_vld1_dup_u64(uint64_t const * a) {
4477 return vld1_dup_u64(a);
4478 }
4479
4480 // CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #0 {
4481 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4482 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4483 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4484 // CHECK: ret <8 x i8> [[LANE]]
test_vld1_dup_s8(int8_t const * a)4485 int8x8_t test_vld1_dup_s8(int8_t const * a) {
4486 return vld1_dup_s8(a);
4487 }
4488
4489 // CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #0 {
4490 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4491 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4492 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4493 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4494 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4495 // CHECK: ret <4 x i16> [[LANE]]
test_vld1_dup_s16(int16_t const * a)4496 int16x4_t test_vld1_dup_s16(int16_t const * a) {
4497 return vld1_dup_s16(a);
4498 }
4499
4500 // CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #0 {
4501 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4502 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4503 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4504 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4505 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4506 // CHECK: ret <2 x i32> [[LANE]]
test_vld1_dup_s32(int32_t const * a)4507 int32x2_t test_vld1_dup_s32(int32_t const * a) {
4508 return vld1_dup_s32(a);
4509 }
4510
4511 // CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #0 {
4512 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4513 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4514 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4515 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4516 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4517 // CHECK: ret <1 x i64> [[LANE]]
test_vld1_dup_s64(int64_t const * a)4518 int64x1_t test_vld1_dup_s64(int64_t const * a) {
4519 return vld1_dup_s64(a);
4520 }
4521
4522 // CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 {
4523 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4524 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4525 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4526 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4527 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4528 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half>
4529 // CHECK: ret <4 x half> [[TMP4]]
test_vld1_dup_f16(float16_t const * a)4530 float16x4_t test_vld1_dup_f16(float16_t const * a) {
4531 return vld1_dup_f16(a);
4532 }
4533
4534 // CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #0 {
4535 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4536 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4537 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4538 // CHECK: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
4539 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
4540 // CHECK: ret <2 x float> [[LANE]]
test_vld1_dup_f32(float32_t const * a)4541 float32x2_t test_vld1_dup_f32(float32_t const * a) {
4542 return vld1_dup_f32(a);
4543 }
4544
4545 // CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #0 {
4546 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4547 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4548 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4549 // CHECK: ret <8 x i8> [[LANE]]
test_vld1_dup_p8(poly8_t const * a)4550 poly8x8_t test_vld1_dup_p8(poly8_t const * a) {
4551 return vld1_dup_p8(a);
4552 }
4553
4554 // CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #0 {
4555 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4556 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4557 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4558 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4559 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4560 // CHECK: ret <4 x i16> [[LANE]]
test_vld1_dup_p16(poly16_t const * a)4561 poly16x4_t test_vld1_dup_p16(poly16_t const * a) {
4562 return vld1_dup_p16(a);
4563 }
4564
4565
4566 // CHECK-LABEL: define <16 x i8> @test_vld1q_lane_u8(i8* %a, <16 x i8> %b) #0 {
4567 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4568 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4569 // CHECK: ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_u8(uint8_t const * a,uint8x16_t b)4570 uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) {
4571 return vld1q_lane_u8(a, b, 15);
4572 }
4573
4574 // CHECK-LABEL: define <8 x i16> @test_vld1q_lane_u16(i16* %a, <8 x i16> %b) #0 {
4575 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4576 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4577 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4578 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4579 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4580 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4581 // CHECK: ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_u16(uint16_t const * a,uint16x8_t b)4582 uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) {
4583 return vld1q_lane_u16(a, b, 7);
4584 }
4585
4586 // CHECK-LABEL: define <4 x i32> @test_vld1q_lane_u32(i32* %a, <4 x i32> %b) #0 {
4587 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4588 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4589 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4590 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4591 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4592 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4593 // CHECK: ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_u32(uint32_t const * a,uint32x4_t b)4594 uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) {
4595 return vld1q_lane_u32(a, b, 3);
4596 }
4597
4598 // CHECK-LABEL: define <2 x i64> @test_vld1q_lane_u64(i64* %a, <2 x i64> %b) #0 {
4599 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4600 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4601 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4602 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4603 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4604 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4605 // CHECK: ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_u64(uint64_t const * a,uint64x2_t b)4606 uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) {
4607 return vld1q_lane_u64(a, b, 1);
4608 }
4609
4610 // CHECK-LABEL: define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) #0 {
4611 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4612 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4613 // CHECK: ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_s8(int8_t const * a,int8x16_t b)4614 int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) {
4615 return vld1q_lane_s8(a, b, 15);
4616 }
4617
4618 // CHECK-LABEL: define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) #0 {
4619 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4620 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4621 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4622 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4623 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4624 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4625 // CHECK: ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_s16(int16_t const * a,int16x8_t b)4626 int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) {
4627 return vld1q_lane_s16(a, b, 7);
4628 }
4629
4630 // CHECK-LABEL: define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) #0 {
4631 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4632 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4633 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4634 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4635 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4636 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4637 // CHECK: ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_s32(int32_t const * a,int32x4_t b)4638 int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) {
4639 return vld1q_lane_s32(a, b, 3);
4640 }
4641
4642 // CHECK-LABEL: define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) #0 {
4643 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4644 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4645 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4646 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4647 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4648 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4649 // CHECK: ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_s64(int64_t const * a,int64x2_t b)4650 int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
4651 return vld1q_lane_s64(a, b, 1);
4652 }
4653
4654 // CHECK-LABEL: define <8 x half> @test_vld1q_lane_f16(half* %a, <8 x half> %b) #0 {
4655 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4656 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
4657 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4658 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4659 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4660 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4661 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[VLD1_LANE]] to <8 x half>
4662 // CHECK: ret <8 x half> [[TMP5]]
test_vld1q_lane_f16(float16_t const * a,float16x8_t b)4663 float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
4664 return vld1q_lane_f16(a, b, 7);
4665 }
4666
4667 // CHECK-LABEL: define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) #0 {
4668 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4669 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4670 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4671 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4672 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4673 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3
4674 // CHECK: ret <4 x float> [[VLD1_LANE]]
test_vld1q_lane_f32(float32_t const * a,float32x4_t b)4675 float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) {
4676 return vld1q_lane_f32(a, b, 3);
4677 }
4678
4679 // CHECK-LABEL: define <16 x i8> @test_vld1q_lane_p8(i8* %a, <16 x i8> %b) #0 {
4680 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4681 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4682 // CHECK: ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_p8(poly8_t const * a,poly8x16_t b)4683 poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) {
4684 return vld1q_lane_p8(a, b, 15);
4685 }
4686
4687 // CHECK-LABEL: define <8 x i16> @test_vld1q_lane_p16(i16* %a, <8 x i16> %b) #0 {
4688 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4689 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4690 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4691 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4692 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4693 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4694 // CHECK: ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_p16(poly16_t const * a,poly16x8_t b)4695 poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) {
4696 return vld1q_lane_p16(a, b, 7);
4697 }
4698
4699 // CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #0 {
4700 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4701 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4702 // CHECK: ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_u8(uint8_t const * a,uint8x8_t b)4703 uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) {
4704 return vld1_lane_u8(a, b, 7);
4705 }
4706
4707 // CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #0 {
4708 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4709 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4710 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4711 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4712 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4713 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4714 // CHECK: ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_u16(uint16_t const * a,uint16x4_t b)4715 uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) {
4716 return vld1_lane_u16(a, b, 3);
4717 }
4718
4719 // CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #0 {
4720 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4721 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4722 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4723 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4724 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4725 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4726 // CHECK: ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_u32(uint32_t const * a,uint32x2_t b)4727 uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) {
4728 return vld1_lane_u32(a, b, 1);
4729 }
4730
4731 // CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #0 {
4732 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4733 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4734 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4735 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4736 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4737 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4738 // CHECK: ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_u64(uint64_t const * a,uint64x1_t b)4739 uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) {
4740 return vld1_lane_u64(a, b, 0);
4741 }
4742
4743 // CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #0 {
4744 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4745 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4746 // CHECK: ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_s8(int8_t const * a,int8x8_t b)4747 int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) {
4748 return vld1_lane_s8(a, b, 7);
4749 }
4750
4751 // CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #0 {
4752 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4753 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4754 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4755 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4756 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4757 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4758 // CHECK: ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_s16(int16_t const * a,int16x4_t b)4759 int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) {
4760 return vld1_lane_s16(a, b, 3);
4761 }
4762
4763 // CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #0 {
4764 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4765 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4766 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4767 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4768 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4769 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4770 // CHECK: ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_s32(int32_t const * a,int32x2_t b)4771 int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) {
4772 return vld1_lane_s32(a, b, 1);
4773 }
4774
4775 // CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #0 {
4776 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4777 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4778 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4779 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4780 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4781 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4782 // CHECK: ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_s64(int64_t const * a,int64x1_t b)4783 int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
4784 return vld1_lane_s64(a, b, 0);
4785 }
4786
4787 // CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 {
4788 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4789 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
4790 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4791 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4792 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4793 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4794 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[VLD1_LANE]] to <4 x half>
4795 // CHECK: ret <4 x half> [[TMP5]]
test_vld1_lane_f16(float16_t const * a,float16x4_t b)4796 float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
4797 return vld1_lane_f16(a, b, 3);
4798 }
4799
4800 // CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #0 {
4801 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4802 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4803 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4804 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4805 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4806 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1
4807 // CHECK: ret <2 x float> [[VLD1_LANE]]
test_vld1_lane_f32(float32_t const * a,float32x2_t b)4808 float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) {
4809 return vld1_lane_f32(a, b, 1);
4810 }
4811
4812 // CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #0 {
4813 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4814 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4815 // CHECK: ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_p8(poly8_t const * a,poly8x8_t b)4816 poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) {
4817 return vld1_lane_p8(a, b, 7);
4818 }
4819
4820 // CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #0 {
4821 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4822 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4823 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4824 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4825 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4826 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4827 // CHECK: ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_p16(poly16_t const * a,poly16x4_t b)4828 poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) {
4829 return vld1_lane_p16(a, b, 3);
4830 }
4831
4832
4833 // CHECK-LABEL: define void @test_vld2q_u8(%struct.uint8x16x2_t* noalias sret %agg.result, i8* %a) #0 {
4834 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
4835 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
4836 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1)
4837 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
4838 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]]
4839 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
4840 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
4841 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false)
4842 // CHECK: ret void
test_vld2q_u8(uint8_t const * a)4843 uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
4844 return vld2q_u8(a);
4845 }
4846
4847 // CHECK-LABEL: define void @test_vld2q_u16(%struct.uint16x8x2_t* noalias sret %agg.result, i16* %a) #0 {
4848 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4849 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4850 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4851 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2)
4852 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
4853 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]]
4854 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
4855 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4856 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
4857 // CHECK: ret void
test_vld2q_u16(uint16_t const * a)4858 uint16x8x2_t test_vld2q_u16(uint16_t const * a) {
4859 return vld2q_u16(a);
4860 }
4861
4862 // CHECK-LABEL: define void @test_vld2q_u32(%struct.uint32x4x2_t* noalias sret %agg.result, i32* %a) #0 {
4863 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4864 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4865 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4866 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4)
4867 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
4868 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_V]], { <4 x i32>, <4 x i32> }* [[TMP2]]
4869 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
4870 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4871 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
4872 // CHECK: ret void
test_vld2q_u32(uint32_t const * a)4873 uint32x4x2_t test_vld2q_u32(uint32_t const * a) {
4874 return vld2q_u32(a);
4875 }
4876
4877 // CHECK-LABEL: define void @test_vld2q_s8(%struct.int8x16x2_t* noalias sret %agg.result, i8* %a) #0 {
4878 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
4879 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
4880 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1)
4881 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
4882 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]]
4883 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
4884 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
4885 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false)
4886 // CHECK: ret void
test_vld2q_s8(int8_t const * a)4887 int8x16x2_t test_vld2q_s8(int8_t const * a) {
4888 return vld2q_s8(a);
4889 }
4890
4891 // CHECK-LABEL: define void @test_vld2q_s16(%struct.int16x8x2_t* noalias sret %agg.result, i16* %a) #0 {
4892 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4893 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4894 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4895 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2)
4896 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
4897 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]]
4898 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
4899 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4900 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
4901 // CHECK: ret void
test_vld2q_s16(int16_t const * a)4902 int16x8x2_t test_vld2q_s16(int16_t const * a) {
4903 return vld2q_s16(a);
4904 }
4905
4906 // CHECK-LABEL: define void @test_vld2q_s32(%struct.int32x4x2_t* noalias sret %agg.result, i32* %a) #0 {
4907 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4908 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4909 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4910 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4)
4911 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
4912 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_V]], { <4 x i32>, <4 x i32> }* [[TMP2]]
4913 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
4914 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4915 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
4916 // CHECK: ret void
test_vld2q_s32(int32_t const * a)4917 int32x4x2_t test_vld2q_s32(int32_t const * a) {
4918 return vld2q_s32(a);
4919 }
4920
4921 // CHECK-LABEL: define void @test_vld2q_f16(%struct.float16x8x2_t* noalias sret %agg.result, half* %a) #0 {
4922 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4923 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4924 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
4925 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2)
4926 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
4927 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]]
4928 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* %agg.result to i8*
4929 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4930 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
4931 // CHECK: ret void
test_vld2q_f16(float16_t const * a)4932 float16x8x2_t test_vld2q_f16(float16_t const * a) {
4933 return vld2q_f16(a);
4934 }
4935
4936 // CHECK-LABEL: define void @test_vld2q_f32(%struct.float32x4x2_t* noalias sret %agg.result, float* %a) #0 {
4937 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4938 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4939 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
4940 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* [[TMP1]], i32 4)
4941 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
4942 // CHECK: store { <4 x float>, <4 x float> } [[VLD2Q_V]], { <4 x float>, <4 x float> }* [[TMP2]]
4943 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
4944 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4945 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
4946 // CHECK: ret void
test_vld2q_f32(float32_t const * a)4947 float32x4x2_t test_vld2q_f32(float32_t const * a) {
4948 return vld2q_f32(a);
4949 }
4950
4951 // CHECK-LABEL: define void @test_vld2q_p8(%struct.poly8x16x2_t* noalias sret %agg.result, i8* %a) #0 {
4952 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
4953 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
4954 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1)
4955 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
4956 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]]
4957 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
4958 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
4959 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false)
4960 // CHECK: ret void
test_vld2q_p8(poly8_t const * a)4961 poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
4962 return vld2q_p8(a);
4963 }
4964
4965 // CHECK-LABEL: define void @test_vld2q_p16(%struct.poly16x8x2_t* noalias sret %agg.result, i16* %a) #0 {
4966 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4967 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4968 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4969 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2)
4970 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
4971 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]]
4972 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
4973 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4974 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
4975 // CHECK: ret void
test_vld2q_p16(poly16_t const * a)4976 poly16x8x2_t test_vld2q_p16(poly16_t const * a) {
4977 return vld2q_p16(a);
4978 }
4979
4980 // CHECK-LABEL: define void @test_vld2_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a) #0 {
4981 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4982 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4983 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1)
4984 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
4985 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]]
4986 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8*
4987 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4988 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false)
4989 // CHECK: ret void
test_vld2_u8(uint8_t const * a)4990 uint8x8x2_t test_vld2_u8(uint8_t const * a) {
4991 return vld2_u8(a);
4992 }
4993
4994 // CHECK-LABEL: define void @test_vld2_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a) #0 {
4995 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4996 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
4997 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4998 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2)
4999 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
5000 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]]
5001 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8*
5002 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
5003 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5004 // CHECK: ret void
test_vld2_u16(uint16_t const * a)5005 uint16x4x2_t test_vld2_u16(uint16_t const * a) {
5006 return vld2_u16(a);
5007 }
5008
5009 // CHECK-LABEL: define void @test_vld2_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a) #0 {
5010 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
5011 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
5012 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5013 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4)
5014 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
5015 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_V]], { <2 x i32>, <2 x i32> }* [[TMP2]]
5016 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8*
5017 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
5018 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5019 // CHECK: ret void
test_vld2_u32(uint32_t const * a)5020 uint32x2x2_t test_vld2_u32(uint32_t const * a) {
5021 return vld2_u32(a);
5022 }
5023
5024 // CHECK-LABEL: define void @test_vld2_u64(%struct.uint64x1x2_t* noalias sret %agg.result, i64* %a) #0 {
5025 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
5026 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
5027 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
5028 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4)
5029 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
5030 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_V]], { <1 x i64>, <1 x i64> }* [[TMP2]]
5031 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x2_t* %agg.result to i8*
5032 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
5033 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5034 // CHECK: ret void
test_vld2_u64(uint64_t const * a)5035 uint64x1x2_t test_vld2_u64(uint64_t const * a) {
5036 return vld2_u64(a);
5037 }
5038
5039 // CHECK-LABEL: define void @test_vld2_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a) #0 {
5040 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
5041 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5042 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1)
5043 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
5044 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]]
5045 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8*
5046 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5047 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false)
5048 // CHECK: ret void
test_vld2_s8(int8_t const * a)5049 int8x8x2_t test_vld2_s8(int8_t const * a) {
5050 return vld2_s8(a);
5051 }
5052
5053 // CHECK-LABEL: define void @test_vld2_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a) #0 {
5054 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
5055 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5056 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5057 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2)
5058 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
5059 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]]
5060 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8*
5061 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5062 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5063 // CHECK: ret void
test_vld2_s16(int16_t const * a)5064 int16x4x2_t test_vld2_s16(int16_t const * a) {
5065 return vld2_s16(a);
5066 }
5067
5068 // CHECK-LABEL: define void @test_vld2_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a) #0 {
5069 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
5070 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5071 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5072 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4)
5073 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
5074 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_V]], { <2 x i32>, <2 x i32> }* [[TMP2]]
5075 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8*
5076 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5077 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5078 // CHECK: ret void
test_vld2_s32(int32_t const * a)5079 int32x2x2_t test_vld2_s32(int32_t const * a) {
5080 return vld2_s32(a);
5081 }
5082
5083 // CHECK-LABEL: define void @test_vld2_s64(%struct.int64x1x2_t* noalias sret %agg.result, i64* %a) #0 {
5084 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
5085 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
5086 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
5087 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4)
5088 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
5089 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_V]], { <1 x i64>, <1 x i64> }* [[TMP2]]
5090 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x2_t* %agg.result to i8*
5091 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
5092 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5093 // CHECK: ret void
test_vld2_s64(int64_t const * a)5094 int64x1x2_t test_vld2_s64(int64_t const * a) {
5095 return vld2_s64(a);
5096 }
5097
5098 // CHECK-LABEL: define void @test_vld2_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a) #0 {
5099 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
5100 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5101 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
5102 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2)
5103 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
5104 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]]
5105 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8*
5106 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5107 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5108 // CHECK: ret void
test_vld2_f16(float16_t const * a)5109 float16x4x2_t test_vld2_f16(float16_t const * a) {
5110 return vld2_f16(a);
5111 }
5112
5113 // CHECK-LABEL: define void @test_vld2_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a) #0 {
5114 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
5115 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5116 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
5117 // CHECK: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32.p0i8(i8* [[TMP1]], i32 4)
5118 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
5119 // CHECK: store { <2 x float>, <2 x float> } [[VLD2_V]], { <2 x float>, <2 x float> }* [[TMP2]]
5120 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8*
5121 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5122 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5123 // CHECK: ret void
test_vld2_f32(float32_t const * a)5124 float32x2x2_t test_vld2_f32(float32_t const * a) {
5125 return vld2_f32(a);
5126 }
5127
5128 // CHECK-LABEL: define void @test_vld2_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a) #0 {
5129 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
5130 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5131 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1)
5132 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
5133 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]]
5134 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8*
5135 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5136 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false)
5137 // CHECK: ret void
test_vld2_p8(poly8_t const * a)5138 poly8x8x2_t test_vld2_p8(poly8_t const * a) {
5139 return vld2_p8(a);
5140 }
5141
5142 // CHECK-LABEL: define void @test_vld2_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a) #0 {
5143 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
5144 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5145 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5146 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2)
5147 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
5148 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]]
5149 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8*
5150 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5151 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5152 // CHECK: ret void
test_vld2_p16(poly16_t const * a)5153 poly16x4x2_t test_vld2_p16(poly16_t const * a) {
5154 return vld2_p16(a);
5155 }
5156
5157
5158 // CHECK-LABEL: define void @test_vld2_dup_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a) #0 {
5159 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
5160 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
5161 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
5162 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0
5163 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
5164 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0
5165 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1
5166 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer
5167 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1
5168 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
5169 // CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]]
5170 // CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8*
5171 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
5172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false)
5173 // CHECK: ret void
test_vld2_dup_u8(uint8_t const * a)5174 uint8x8x2_t test_vld2_dup_u8(uint8_t const * a) {
5175 return vld2_dup_u8(a);
5176 }
5177
5178 // CHECK-LABEL: define void @test_vld2_dup_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a) #0 {
5179 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
5180 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
5181 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5182 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
5183 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
5184 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
5185 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
5186 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1
5187 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
5188 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
5189 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
5190 // CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]]
5191 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8*
5192 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
5193 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5194 // CHECK: ret void
test_vld2_dup_u16(uint16_t const * a)5195 uint16x4x2_t test_vld2_dup_u16(uint16_t const * a) {
5196 return vld2_dup_u16(a);
5197 }
5198
5199 // CHECK-LABEL: define void @test_vld2_dup_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a) #0 {
5200 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
5201 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
5202 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5203 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
5204 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], 0
5205 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
5206 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0
5207 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP3]], 1
5208 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer
5209 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1
5210 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
5211 // CHECK: store { <2 x i32>, <2 x i32> } [[TMP5]], { <2 x i32>, <2 x i32> }* [[TMP6]]
5212 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8*
5213 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
5214 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5215 // CHECK: ret void
test_vld2_dup_u32(uint32_t const * a)5216 uint32x2x2_t test_vld2_dup_u32(uint32_t const * a) {
5217 return vld2_dup_u32(a);
5218 }
5219
5220 // CHECK-LABEL: define void @test_vld2_dup_u64(%struct.uint64x1x2_t* noalias sret %agg.result, i64* %a) #0 {
5221 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
5222 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
5223 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
5224 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4)
5225 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
5226 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64> }* [[TMP2]]
5227 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x2_t* %agg.result to i8*
5228 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
5229 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5230 // CHECK: ret void
test_vld2_dup_u64(uint64_t const * a)5231 uint64x1x2_t test_vld2_dup_u64(uint64_t const * a) {
5232 return vld2_dup_u64(a);
5233 }
5234
5235 // CHECK-LABEL: define void @test_vld2_dup_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a) #0 {
5236 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
5237 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5238 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
5239 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0
5240 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
5241 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0
5242 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1
5243 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer
5244 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1
5245 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
5246 // CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]]
5247 // CHECK: [[TMP6:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8*
5248 // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5249 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false)
5250 // CHECK: ret void
test_vld2_dup_s8(int8_t const * a)5251 int8x8x2_t test_vld2_dup_s8(int8_t const * a) {
5252 return vld2_dup_s8(a);
5253 }
5254
5255 // CHECK-LABEL: define void @test_vld2_dup_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a) #0 {
5256 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
5257 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5258 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5259 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
5260 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
5261 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
5262 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
5263 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1
5264 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
5265 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
5266 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
5267 // CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]]
5268 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8*
5269 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5270 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5271 // CHECK: ret void
test_vld2_dup_s16(int16_t const * a)5272 int16x4x2_t test_vld2_dup_s16(int16_t const * a) {
5273 return vld2_dup_s16(a);
5274 }
5275
5276 // CHECK-LABEL: define void @test_vld2_dup_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a) #0 {
5277 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
5278 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5279 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5280 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
5281 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], 0
5282 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
5283 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0
5284 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP3]], 1
5285 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer
5286 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1
5287 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
5288 // CHECK: store { <2 x i32>, <2 x i32> } [[TMP5]], { <2 x i32>, <2 x i32> }* [[TMP6]]
5289 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8*
5290 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5291 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5292 // CHECK: ret void
test_vld2_dup_s32(int32_t const * a)5293 int32x2x2_t test_vld2_dup_s32(int32_t const * a) {
5294 return vld2_dup_s32(a);
5295 }
5296
5297 // CHECK-LABEL: define void @test_vld2_dup_s64(%struct.int64x1x2_t* noalias sret %agg.result, i64* %a) #0 {
5298 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
5299 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
5300 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
5301 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4)
5302 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
5303 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64> }* [[TMP2]]
5304 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x2_t* %agg.result to i8*
5305 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
5306 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
5307 // CHECK: ret void
test_vld2_dup_s64(int64_t const * a)5308 int64x1x2_t test_vld2_dup_s64(int64_t const * a) {
5309 return vld2_dup_s64(a);
5310 }
5311
5312 // CHECK-LABEL: define void @test_vld2_dup_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a) #0 {
5313 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
5314 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5315 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
5316 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
5317 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
5318 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
5319 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
5320 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1
5321 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
5322 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
5323 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
5324 // CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]]
5325 // CHECK: [[TMP7:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8*
5326 // CHECK: [[TMP8:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5327 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5328 // CHECK: ret void
test_vld2_dup_f16(float16_t const * a)5329 float16x4x2_t test_vld2_dup_f16(float16_t const * a) {
5330 return vld2_dup_f16(a);
5331 }
5332
5333 // CHECK-LABEL: define void @test_vld2_dup_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a) #0 {
5334 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
5335 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5336 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
5337 // CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, i32 0, i32 4)
5338 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD_DUP]], 0
5339 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
5340 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0
5341 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1
5342 // CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer
5343 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1
5344 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
5345 // CHECK: store { <2 x float>, <2 x float> } [[TMP5]], { <2 x float>, <2 x float> }* [[TMP6]]
5346 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8*
5347 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5348 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5349 // CHECK: ret void
test_vld2_dup_f32(float32_t const * a)5350 float32x2x2_t test_vld2_dup_f32(float32_t const * a) {
5351 return vld2_dup_f32(a);
5352 }
5353
5354 // CHECK-LABEL: define void @test_vld2_dup_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a) #0 {
5355 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
5356 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5357 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
5358 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0
5359 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
5360 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0
5361 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1
5362 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer
5363 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1
5364 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
5365 // CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]]
5366 // CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8*
5367 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5368 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false)
5369 // CHECK: ret void
test_vld2_dup_p8(poly8_t const * a)5370 poly8x8x2_t test_vld2_dup_p8(poly8_t const * a) {
5371 return vld2_dup_p8(a);
5372 }
5373
5374 // CHECK-LABEL: define void @test_vld2_dup_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a) #0 {
5375 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
5376 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5377 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5378 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
5379 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
5380 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
5381 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
5382 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1
5383 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
5384 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
5385 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
5386 // CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]]
5387 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8*
5388 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5389 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5390 // CHECK: ret void
test_vld2_dup_p16(poly16_t const * a)5391 poly16x4x2_t test_vld2_dup_p16(poly16_t const * a) {
5392 return vld2_dup_p16(a);
5393 }
5394
5395
5396 // CHECK-LABEL: define void @test_vld2q_lane_u16(%struct.uint16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 {
5397 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
5398 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
5399 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
5400 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
5401 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
5402 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
5403 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
5404 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
5405 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
5406 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
5407 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5408 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
5409 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
5410 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5411 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5412 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
5413 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5414 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5415 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5416 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5417 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5418 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2)
5419 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }*
5420 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]]
5421 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
5422 // CHECK: [[TMP13:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
5423 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false)
5424 // CHECK: ret void
test_vld2q_lane_u16(uint16_t const * a,uint16x8x2_t b)5425 uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) {
5426 return vld2q_lane_u16(a, b, 7);
5427 }
5428
5429 // CHECK-LABEL: define void @test_vld2q_lane_u32(%struct.uint32x4x2_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 {
5430 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
5431 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
5432 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
5433 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
5434 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
5435 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
5436 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
5437 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
5438 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
5439 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
5440 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5441 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
5442 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
5443 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5444 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5445 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
5446 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5447 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5448 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5449 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5450 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5451 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], i32 3, i32 4)
5452 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32> }*
5453 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], { <4 x i32>, <4 x i32> }* [[TMP11]]
5454 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
5455 // CHECK: [[TMP13:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
5456 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false)
5457 // CHECK: ret void
test_vld2q_lane_u32(uint32_t const * a,uint32x4x2_t b)5458 uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) {
5459 return vld2q_lane_u32(a, b, 3);
5460 }
5461
5462 // CHECK-LABEL: define void @test_vld2q_lane_s16(%struct.int16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 {
5463 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
5464 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
5465 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
5466 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
5467 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
5468 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
5469 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
5470 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
5471 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
5472 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
5473 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5474 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
5475 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
5476 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5477 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5478 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
5479 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5480 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5481 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5482 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5483 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5484 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2)
5485 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }*
5486 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]]
5487 // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
5488 // CHECK: [[TMP13:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
5489 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false)
5490 // CHECK: ret void
test_vld2q_lane_s16(int16_t const * a,int16x8x2_t b)5491 int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) {
5492 return vld2q_lane_s16(a, b, 7);
5493 }
5494
5495 // CHECK-LABEL: define void @test_vld2q_lane_s32(%struct.int32x4x2_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 {
5496 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
5497 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
5498 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
5499 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
5500 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
5501 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
5502 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
5503 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
5504 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
5505 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
5506 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5507 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
5508 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
5509 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5510 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5511 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
5512 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5513 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5514 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5515 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5516 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5517 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], i32 3, i32 4)
5518 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32> }*
5519 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], { <4 x i32>, <4 x i32> }* [[TMP11]]
5520 // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
5521 // CHECK: [[TMP13:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
5522 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false)
5523 // CHECK: ret void
test_vld2q_lane_s32(int32_t const * a,int32x4x2_t b)5524 int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
5525 return vld2q_lane_s32(a, b, 3);
5526 }
5527
5528 // CHECK-LABEL: define void @test_vld2q_lane_f16(%struct.float16x8x2_t* noalias sret %agg.result, half* %a, [4 x i64] %b.coerce) #0 {
5529 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
5530 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
5531 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
5532 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
5533 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
5534 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
5535 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
5536 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
5537 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
5538 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
5539 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
5540 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
5541 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
5542 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
5543 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
5544 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
5545 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
5546 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
5547 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
5548 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5549 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5550 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2)
5551 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }*
5552 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]]
5553 // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* %agg.result to i8*
5554 // CHECK: [[TMP13:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
5555 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false)
5556 // CHECK: ret void
test_vld2q_lane_f16(float16_t const * a,float16x8x2_t b)5557 float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
5558 return vld2q_lane_f16(a, b, 7);
5559 }
5560
5561 // CHECK-LABEL: define void @test_vld2q_lane_f32(%struct.float32x4x2_t* noalias sret %agg.result, float* %a, [4 x i64] %b.coerce) #0 {
5562 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
5563 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
5564 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
5565 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
5566 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
5567 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
5568 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
5569 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
5570 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
5571 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
5572 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
5573 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
5574 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
5575 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
5576 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
5577 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
5578 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
5579 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
5580 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
5581 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
5582 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
5583 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP9]], <4 x float> [[TMP10]], i32 3, i32 4)
5584 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float> }*
5585 // CHECK: store { <4 x float>, <4 x float> } [[VLD2Q_LANE_V]], { <4 x float>, <4 x float> }* [[TMP11]]
5586 // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
5587 // CHECK: [[TMP13:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
5588 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false)
5589 // CHECK: ret void
test_vld2q_lane_f32(float32_t const * a,float32x4x2_t b)5590 float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) {
5591 return vld2q_lane_f32(a, b, 3);
5592 }
5593
5594 // CHECK-LABEL: define void @test_vld2q_lane_p16(%struct.poly16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 {
5595 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
5596 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
5597 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
5598 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
5599 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
5600 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
5601 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
5602 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
5603 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
5604 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
5605 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5606 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
5607 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
5608 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5609 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5610 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
5611 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5612 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5613 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5614 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5615 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5616 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2)
5617 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }*
5618 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]]
5619 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
5620 // CHECK: [[TMP13:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
5621 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false)
5622 // CHECK: ret void
test_vld2q_lane_p16(poly16_t const * a,poly16x8x2_t b)5623 poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) {
5624 return vld2q_lane_p16(a, b, 7);
5625 }
5626
5627 // CHECK-LABEL: define void @test_vld2_lane_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 {
5628 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
5629 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
5630 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
5631 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
5632 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5633 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5634 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
5635 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
5636 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5637 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
5638 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
5639 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5640 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5641 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
5642 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5643 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5644 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
5645 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }*
5646 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]]
5647 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8*
5648 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
5649 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5650 // CHECK: ret void
test_vld2_lane_u8(uint8_t const * a,uint8x8x2_t b)5651 uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) {
5652 return vld2_lane_u8(a, b, 7);
5653 }
5654
5655 // CHECK-LABEL: define void @test_vld2_lane_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 {
5656 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
5657 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
5658 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
5659 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
5660 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5661 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5662 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
5663 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
5664 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5665 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
5666 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5667 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
5668 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5669 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5670 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5671 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
5672 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5673 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5674 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5675 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5676 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5677 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2)
5678 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }*
5679 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]]
5680 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8*
5681 // CHECK: [[TMP13:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
5682 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false)
5683 // CHECK: ret void
test_vld2_lane_u16(uint16_t const * a,uint16x4x2_t b)5684 uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) {
5685 return vld2_lane_u16(a, b, 3);
5686 }
5687
5688 // CHECK-LABEL: define void @test_vld2_lane_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a, [2 x i64] %b.coerce) #0 {
5689 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
5690 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
5691 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
5692 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
5693 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
5694 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5695 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
5696 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
5697 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5698 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
5699 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5700 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
5701 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
5702 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5703 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5704 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
5705 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5706 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5707 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5708 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5709 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5710 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], i32 1, i32 4)
5711 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32> }*
5712 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], { <2 x i32>, <2 x i32> }* [[TMP11]]
5713 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8*
5714 // CHECK: [[TMP13:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
5715 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false)
5716 // CHECK: ret void
test_vld2_lane_u32(uint32_t const * a,uint32x2x2_t b)5717 uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) {
5718 return vld2_lane_u32(a, b, 1);
5719 }
5720
5721 // CHECK-LABEL: define void @test_vld2_lane_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 {
5722 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
5723 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
5724 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
5725 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
5726 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5727 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5728 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
5729 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
5730 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5731 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5732 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5733 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5734 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5735 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5736 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5737 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5738 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
5739 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }*
5740 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]]
5741 // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8*
5742 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5743 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5744 // CHECK: ret void
test_vld2_lane_s8(int8_t const * a,int8x8x2_t b)5745 int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) {
5746 return vld2_lane_s8(a, b, 7);
5747 }
5748
5749 // CHECK-LABEL: define void @test_vld2_lane_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 {
5750 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
5751 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
5752 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
5753 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
5754 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5755 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5756 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
5757 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
5758 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5759 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5760 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5761 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5762 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5763 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5764 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5765 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5766 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5767 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5768 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5769 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5770 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5771 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2)
5772 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }*
5773 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]]
5774 // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8*
5775 // CHECK: [[TMP13:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5776 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false)
5777 // CHECK: ret void
test_vld2_lane_s16(int16_t const * a,int16x4x2_t b)5778 int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) {
5779 return vld2_lane_s16(a, b, 3);
5780 }
5781
5782 // CHECK-LABEL: define void @test_vld2_lane_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a, [2 x i64] %b.coerce) #0 {
5783 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
5784 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
5785 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
5786 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
5787 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
5788 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5789 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
5790 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
5791 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5792 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5793 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5794 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5795 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
5796 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5797 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5798 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5799 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5800 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5801 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5802 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5803 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5804 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], i32 1, i32 4)
5805 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32> }*
5806 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], { <2 x i32>, <2 x i32> }* [[TMP11]]
5807 // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8*
5808 // CHECK: [[TMP13:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5809 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false)
5810 // CHECK: ret void
test_vld2_lane_s32(int32_t const * a,int32x2x2_t b)5811 int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
5812 return vld2_lane_s32(a, b, 1);
5813 }
5814
5815 // CHECK-LABEL: define void @test_vld2_lane_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a, [2 x i64] %b.coerce) #0 {
5816 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
5817 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
5818 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
5819 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
5820 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
5821 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5822 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
5823 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
5824 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5825 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5826 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
5827 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5828 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
5829 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
5830 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5831 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5832 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
5833 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
5834 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5835 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5836 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5837 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2)
5838 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }*
5839 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]]
5840 // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8*
5841 // CHECK: [[TMP13:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5842 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false)
5843 // CHECK: ret void
test_vld2_lane_f16(float16_t const * a,float16x4x2_t b)5844 float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
5845 return vld2_lane_f16(a, b, 3);
5846 }
5847
5848 // CHECK-LABEL: define void @test_vld2_lane_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a, [2 x i64] %b.coerce) #0 {
5849 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
5850 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
5851 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
5852 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
5853 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
5854 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5855 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
5856 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
5857 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5858 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5859 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
5860 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5861 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
5862 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
5863 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5864 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5865 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
5866 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
5867 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5868 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5869 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5870 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], i32 1, i32 4)
5871 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float> }*
5872 // CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE_V]], { <2 x float>, <2 x float> }* [[TMP11]]
5873 // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8*
5874 // CHECK: [[TMP13:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5875 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false)
5876 // CHECK: ret void
test_vld2_lane_f32(float32_t const * a,float32x2x2_t b)5877 float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) {
5878 return vld2_lane_f32(a, b, 1);
5879 }
5880
5881 // CHECK-LABEL: define void @test_vld2_lane_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 {
5882 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
5883 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
5884 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
5885 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
5886 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5887 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5888 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
5889 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
5890 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5891 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5892 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5893 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5894 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5895 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5896 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5897 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5898 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
5899 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }*
5900 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]]
5901 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8*
5902 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5903 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false)
5904 // CHECK: ret void
test_vld2_lane_p8(poly8_t const * a,poly8x8x2_t b)5905 poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) {
5906 return vld2_lane_p8(a, b, 7);
5907 }
5908
5909 // CHECK-LABEL: define void @test_vld2_lane_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 {
5910 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
5911 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
5912 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
5913 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
5914 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5915 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5916 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
5917 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
5918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
5919 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5920 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5921 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5922 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5923 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5924 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5925 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5926 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5927 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5928 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5929 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5930 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5931 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2)
5932 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }*
5933 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]]
5934 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8*
5935 // CHECK: [[TMP13:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5936 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false)
5937 // CHECK: ret void
test_vld2_lane_p16(poly16_t const * a,poly16x4x2_t b)5938 poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) {
5939 return vld2_lane_p16(a, b, 3);
5940 }
5941
5942
5943 // CHECK-LABEL: define void @test_vld3q_u8(%struct.uint8x16x3_t* noalias sret %agg.result, i8* %a) #0 {
5944 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
5945 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
5946 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1)
5947 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
5948 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
5949 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* %agg.result to i8*
5950 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
5951 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false)
5952 // CHECK: ret void
test_vld3q_u8(uint8_t const * a)5953 uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
5954 return vld3q_u8(a);
5955 }
5956
5957 // CHECK-LABEL: define void @test_vld3q_u16(%struct.uint16x8x3_t* noalias sret %agg.result, i16* %a) #0 {
5958 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5959 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5960 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5961 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2)
5962 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
5963 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]]
5964 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* %agg.result to i8*
5965 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5966 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false)
5967 // CHECK: ret void
test_vld3q_u16(uint16_t const * a)5968 uint16x8x3_t test_vld3q_u16(uint16_t const * a) {
5969 return vld3q_u16(a);
5970 }
5971
5972 // CHECK-LABEL: define void @test_vld3q_u32(%struct.uint32x4x3_t* noalias sret %agg.result, i32* %a) #0 {
5973 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5974 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5975 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5976 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4)
5977 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
5978 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]]
5979 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* %agg.result to i8*
5980 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5981 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false)
5982 // CHECK: ret void
test_vld3q_u32(uint32_t const * a)5983 uint32x4x3_t test_vld3q_u32(uint32_t const * a) {
5984 return vld3q_u32(a);
5985 }
5986
5987 // CHECK-LABEL: define void @test_vld3q_s8(%struct.int8x16x3_t* noalias sret %agg.result, i8* %a) #0 {
5988 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
5989 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
5990 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1)
5991 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
5992 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
5993 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* %agg.result to i8*
5994 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
5995 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false)
5996 // CHECK: ret void
test_vld3q_s8(int8_t const * a)5997 int8x16x3_t test_vld3q_s8(int8_t const * a) {
5998 return vld3q_s8(a);
5999 }
6000
6001 // CHECK-LABEL: define void @test_vld3q_s16(%struct.int16x8x3_t* noalias sret %agg.result, i16* %a) #0 {
6002 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
6003 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
6004 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6005 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2)
6006 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
6007 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]]
6008 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* %agg.result to i8*
6009 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
6010 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false)
6011 // CHECK: ret void
test_vld3q_s16(int16_t const * a)6012 int16x8x3_t test_vld3q_s16(int16_t const * a) {
6013 return vld3q_s16(a);
6014 }
6015
6016 // CHECK-LABEL: define void @test_vld3q_s32(%struct.int32x4x3_t* noalias sret %agg.result, i32* %a) #0 {
6017 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
6018 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
6019 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
6020 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4)
6021 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
6022 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]]
6023 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* %agg.result to i8*
6024 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
6025 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false)
6026 // CHECK: ret void
test_vld3q_s32(int32_t const * a)6027 int32x4x3_t test_vld3q_s32(int32_t const * a) {
6028 return vld3q_s32(a);
6029 }
6030
6031 // CHECK-LABEL: define void @test_vld3q_f16(%struct.float16x8x3_t* noalias sret %agg.result, half* %a) #0 {
6032 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
6033 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
6034 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
6035 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2)
6036 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
6037 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]]
6038 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* %agg.result to i8*
6039 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
6040 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false)
6041 // CHECK: ret void
test_vld3q_f16(float16_t const * a)6042 float16x8x3_t test_vld3q_f16(float16_t const * a) {
6043 return vld3q_f16(a);
6044 }
6045
6046 // CHECK-LABEL: define void @test_vld3q_f32(%struct.float32x4x3_t* noalias sret %agg.result, float* %a) #0 {
6047 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
6048 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
6049 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
6050 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32.p0i8(i8* [[TMP1]], i32 4)
6051 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
6052 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_V]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP2]]
6053 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* %agg.result to i8*
6054 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
6055 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false)
6056 // CHECK: ret void
test_vld3q_f32(float32_t const * a)6057 float32x4x3_t test_vld3q_f32(float32_t const * a) {
6058 return vld3q_f32(a);
6059 }
6060
6061 // CHECK-LABEL: define void @test_vld3q_p8(%struct.poly8x16x3_t* noalias sret %agg.result, i8* %a) #0 {
6062 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
6063 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
6064 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1)
6065 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
6066 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
6067 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* %agg.result to i8*
6068 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
6069 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false)
6070 // CHECK: ret void
test_vld3q_p8(poly8_t const * a)6071 poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
6072 return vld3q_p8(a);
6073 }
6074
6075 // CHECK-LABEL: define void @test_vld3q_p16(%struct.poly16x8x3_t* noalias sret %agg.result, i16* %a) #0 {
6076 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
6077 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
6078 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6079 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2)
6080 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
6081 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]]
6082 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* %agg.result to i8*
6083 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
6084 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false)
6085 // CHECK: ret void
test_vld3q_p16(poly16_t const * a)6086 poly16x8x3_t test_vld3q_p16(poly16_t const * a) {
6087 return vld3q_p16(a);
6088 }
6089
6090 // CHECK-LABEL: define void @test_vld3_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a) #0 {
6091 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
6092 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
6093 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1)
6094 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
6095 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
6096 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8*
6097 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
6098 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false)
6099 // CHECK: ret void
test_vld3_u8(uint8_t const * a)6100 uint8x8x3_t test_vld3_u8(uint8_t const * a) {
6101 return vld3_u8(a);
6102 }
6103
6104 // CHECK-LABEL: define void @test_vld3_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a) #0 {
6105 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
6106 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
6107 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6108 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2)
6109 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6110 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]]
6111 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8*
6112 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
6113 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6114 // CHECK: ret void
test_vld3_u16(uint16_t const * a)6115 uint16x4x3_t test_vld3_u16(uint16_t const * a) {
6116 return vld3_u16(a);
6117 }
6118
6119 // CHECK-LABEL: define void @test_vld3_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a) #0 {
6120 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
6121 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
6122 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
6123 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4)
6124 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
6125 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]]
6126 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8*
6127 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
6128 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6129 // CHECK: ret void
test_vld3_u32(uint32_t const * a)6130 uint32x2x3_t test_vld3_u32(uint32_t const * a) {
6131 return vld3_u32(a);
6132 }
6133
6134 // CHECK-LABEL: define void @test_vld3_u64(%struct.uint64x1x3_t* noalias sret %agg.result, i64* %a) #0 {
6135 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
6136 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
6137 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
6138 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4)
6139 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
6140 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]]
6141 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x3_t* %agg.result to i8*
6142 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
6143 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6144 // CHECK: ret void
test_vld3_u64(uint64_t const * a)6145 uint64x1x3_t test_vld3_u64(uint64_t const * a) {
6146 return vld3_u64(a);
6147 }
6148
6149 // CHECK-LABEL: define void @test_vld3_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a) #0 {
6150 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
6151 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
6152 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1)
6153 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
6154 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
6155 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8*
6156 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
6157 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false)
6158 // CHECK: ret void
test_vld3_s8(int8_t const * a)6159 int8x8x3_t test_vld3_s8(int8_t const * a) {
6160 return vld3_s8(a);
6161 }
6162
6163 // CHECK-LABEL: define void @test_vld3_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a) #0 {
6164 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
6165 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
6166 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6167 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2)
6168 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6169 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]]
6170 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8*
6171 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
6172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6173 // CHECK: ret void
test_vld3_s16(int16_t const * a)6174 int16x4x3_t test_vld3_s16(int16_t const * a) {
6175 return vld3_s16(a);
6176 }
6177
6178 // CHECK-LABEL: define void @test_vld3_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a) #0 {
6179 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
6180 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
6181 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
6182 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4)
6183 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
6184 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]]
6185 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8*
6186 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
6187 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6188 // CHECK: ret void
test_vld3_s32(int32_t const * a)6189 int32x2x3_t test_vld3_s32(int32_t const * a) {
6190 return vld3_s32(a);
6191 }
6192
6193 // CHECK-LABEL: define void @test_vld3_s64(%struct.int64x1x3_t* noalias sret %agg.result, i64* %a) #0 {
6194 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
6195 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
6196 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
6197 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4)
6198 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
6199 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]]
6200 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x3_t* %agg.result to i8*
6201 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
6202 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6203 // CHECK: ret void
test_vld3_s64(int64_t const * a)6204 int64x1x3_t test_vld3_s64(int64_t const * a) {
6205 return vld3_s64(a);
6206 }
6207
6208 // CHECK-LABEL: define void @test_vld3_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a) #0 {
6209 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
6210 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
6211 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
6212 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2)
6213 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6214 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]]
6215 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8*
6216 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
6217 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6218 // CHECK: ret void
test_vld3_f16(float16_t const * a)6219 float16x4x3_t test_vld3_f16(float16_t const * a) {
6220 return vld3_f16(a);
6221 }
6222
6223 // CHECK-LABEL: define void @test_vld3_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a) #0 {
6224 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
6225 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
6226 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
6227 // CHECK: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32.p0i8(i8* [[TMP1]], i32 4)
6228 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
6229 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_V]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP2]]
6230 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8*
6231 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
6232 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6233 // CHECK: ret void
test_vld3_f32(float32_t const * a)6234 float32x2x3_t test_vld3_f32(float32_t const * a) {
6235 return vld3_f32(a);
6236 }
6237
6238 // CHECK-LABEL: define void @test_vld3_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a) #0 {
6239 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
6240 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
6241 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1)
6242 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
6243 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
6244 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8*
6245 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
6246 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false)
6247 // CHECK: ret void
test_vld3_p8(poly8_t const * a)6248 poly8x8x3_t test_vld3_p8(poly8_t const * a) {
6249 return vld3_p8(a);
6250 }
6251
6252 // CHECK-LABEL: define void @test_vld3_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a) #0 {
6253 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
6254 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
6255 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6256 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2)
6257 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6258 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]]
6259 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8*
6260 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
6261 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6262 // CHECK: ret void
test_vld3_p16(poly16_t const * a)6263 poly16x4x3_t test_vld3_p16(poly16_t const * a) {
6264 return vld3_p16(a);
6265 }
6266
6267
6268 // CHECK-LABEL: define void @test_vld3_dup_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a) #0 {
6269 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
6270 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
6271 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
6272 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0
6273 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
6274 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0
6275 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1
6276 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer
6277 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1
6278 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
6279 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer
6280 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2
6281 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
6282 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
6283 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8*
6284 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
6285 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false)
6286 // CHECK: ret void
test_vld3_dup_u8(uint8_t const * a)6287 uint8x8x3_t test_vld3_dup_u8(uint8_t const * a) {
6288 return vld3_dup_u8(a);
6289 }
6290
6291 // CHECK-LABEL: define void @test_vld3_dup_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a) #0 {
6292 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
6293 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
6294 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6295 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
6296 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
6297 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
6298 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
6299 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1
6300 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
6301 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
6302 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2
6303 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer
6304 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2
6305 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6306 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]]
6307 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8*
6308 // CHECK: [[TMP10:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
6309 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false)
6310 // CHECK: ret void
test_vld3_dup_u16(uint16_t const * a)6311 uint16x4x3_t test_vld3_dup_u16(uint16_t const * a) {
6312 return vld3_dup_u16(a);
6313 }
6314
6315 // CHECK-LABEL: define void @test_vld3_dup_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a) #0 {
6316 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
6317 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
6318 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
6319 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
6320 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0
6321 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
6322 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0
6323 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1
6324 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer
6325 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1
6326 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2
6327 // CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer
6328 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2
6329 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
6330 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP8]]
6331 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8*
6332 // CHECK: [[TMP10:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
6333 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false)
6334 // CHECK: ret void
test_vld3_dup_u32(uint32_t const * a)6335 uint32x2x3_t test_vld3_dup_u32(uint32_t const * a) {
6336 return vld3_dup_u32(a);
6337 }
6338
6339 // CHECK-LABEL: define void @test_vld3_dup_u64(%struct.uint64x1x3_t* noalias sret %agg.result, i64* %a) #0 {
6340 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
6341 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
6342 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
6343 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4)
6344 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
6345 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]]
6346 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x3_t* %agg.result to i8*
6347 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
6348 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6349 // CHECK: ret void
test_vld3_dup_u64(uint64_t const * a)6350 uint64x1x3_t test_vld3_dup_u64(uint64_t const * a) {
6351 return vld3_dup_u64(a);
6352 }
6353
6354 // CHECK-LABEL: define void @test_vld3_dup_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a) #0 {
6355 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
6356 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
6357 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
6358 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0
6359 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
6360 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0
6361 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1
6362 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer
6363 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1
6364 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
6365 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer
6366 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2
6367 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
6368 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
6369 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8*
6370 // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
6371 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false)
6372 // CHECK: ret void
test_vld3_dup_s8(int8_t const * a)6373 int8x8x3_t test_vld3_dup_s8(int8_t const * a) {
6374 return vld3_dup_s8(a);
6375 }
6376
6377 // CHECK-LABEL: define void @test_vld3_dup_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a) #0 {
6378 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
6379 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
6380 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6381 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
6382 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
6383 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
6384 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
6385 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1
6386 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
6387 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
6388 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2
6389 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer
6390 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2
6391 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6392 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]]
6393 // CHECK: [[TMP9:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8*
6394 // CHECK: [[TMP10:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
6395 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false)
6396 // CHECK: ret void
test_vld3_dup_s16(int16_t const * a)6397 int16x4x3_t test_vld3_dup_s16(int16_t const * a) {
6398 return vld3_dup_s16(a);
6399 }
6400
6401 // CHECK-LABEL: define void @test_vld3_dup_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a) #0 {
6402 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
6403 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
6404 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
6405 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
6406 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0
6407 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
6408 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0
6409 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1
6410 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer
6411 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1
6412 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2
6413 // CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer
6414 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2
6415 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
6416 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP8]]
6417 // CHECK: [[TMP9:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8*
6418 // CHECK: [[TMP10:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
6419 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false)
6420 // CHECK: ret void
test_vld3_dup_s32(int32_t const * a)6421 int32x2x3_t test_vld3_dup_s32(int32_t const * a) {
6422 return vld3_dup_s32(a);
6423 }
6424
6425 // CHECK-LABEL: define void @test_vld3_dup_s64(%struct.int64x1x3_t* noalias sret %agg.result, i64* %a) #0 {
6426 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
6427 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
6428 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
6429 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4)
6430 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
6431 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]]
6432 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x3_t* %agg.result to i8*
6433 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
6434 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false)
6435 // CHECK: ret void
test_vld3_dup_s64(int64_t const * a)6436 int64x1x3_t test_vld3_dup_s64(int64_t const * a) {
6437 return vld3_dup_s64(a);
6438 }
6439
6440 // CHECK-LABEL: define void @test_vld3_dup_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a) #0 {
6441 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
6442 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
6443 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
6444 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
6445 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
6446 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
6447 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
6448 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1
6449 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
6450 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
6451 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2
6452 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer
6453 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2
6454 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6455 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]]
6456 // CHECK: [[TMP9:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8*
6457 // CHECK: [[TMP10:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
6458 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false)
6459 // CHECK: ret void
test_vld3_dup_f16(float16_t const * a)6460 float16x4x3_t test_vld3_dup_f16(float16_t const * a) {
6461 return vld3_dup_f16(a);
6462 }
6463
6464 // CHECK-LABEL: define void @test_vld3_dup_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a) #0 {
6465 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
6466 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
6467 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
6468 // CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
6469 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], 0
6470 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
6471 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0
6472 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP3]], 1
6473 // CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer
6474 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1
6475 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP5]], 2
6476 // CHECK: [[LANE2:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer
6477 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP5]], <2 x float> [[LANE2]], 2
6478 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
6479 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[TMP7]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP8]]
6480 // CHECK: [[TMP9:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8*
6481 // CHECK: [[TMP10:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
6482 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false)
6483 // CHECK: ret void
test_vld3_dup_f32(float32_t const * a)6484 float32x2x3_t test_vld3_dup_f32(float32_t const * a) {
6485 return vld3_dup_f32(a);
6486 }
6487
6488 // CHECK-LABEL: define void @test_vld3_dup_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a) #0 {
6489 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
6490 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
6491 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
6492 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0
6493 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
6494 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0
6495 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1
6496 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer
6497 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1
6498 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
6499 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer
6500 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2
6501 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
6502 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
6503 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8*
6504 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
6505 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false)
6506 // CHECK: ret void
test_vld3_dup_p8(poly8_t const * a)6507 poly8x8x3_t test_vld3_dup_p8(poly8_t const * a) {
6508 return vld3_dup_p8(a);
6509 }
6510
6511 // CHECK-LABEL: define void @test_vld3_dup_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a) #0 {
6512 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
6513 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
6514 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6515 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
6516 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
6517 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
6518 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
6519 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1
6520 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
6521 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
6522 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2
6523 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer
6524 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2
6525 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6526 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]]
6527 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8*
6528 // CHECK: [[TMP10:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
6529 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false)
6530 // CHECK: ret void
test_vld3_dup_p16(poly16_t const * a)6531 poly16x4x3_t test_vld3_dup_p16(poly16_t const * a) {
6532 return vld3_dup_p16(a);
6533 }
6534
6535
6536 // CHECK-LABEL: define void @test_vld3q_lane_u16(%struct.uint16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 {
6537 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
6538 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
6539 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
6540 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
6541 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
6542 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
6543 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
6544 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
6545 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
6546 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
6547 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6548 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
6549 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
6550 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6551 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6552 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
6553 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6554 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6555 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6556 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
6557 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6558 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6559 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6560 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6561 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6562 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6563 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2)
6564 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
6565 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]]
6566 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* %agg.result to i8*
6567 // CHECK: [[TMP16:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
6568 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false)
6569 // CHECK: ret void
test_vld3q_lane_u16(uint16_t const * a,uint16x8x3_t b)6570 uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) {
6571 return vld3q_lane_u16(a, b, 7);
6572 }
6573
6574 // CHECK-LABEL: define void @test_vld3q_lane_u32(%struct.uint32x4x3_t* noalias sret %agg.result, i32* %a, [6 x i64] %b.coerce) #0 {
6575 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
6576 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
6577 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
6578 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
6579 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
6580 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
6581 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
6582 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
6583 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
6584 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
6585 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6586 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
6587 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
6588 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6589 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6590 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
6591 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6592 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6593 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6594 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
6595 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6596 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6597 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6598 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6599 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6600 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6601 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], i32 3, i32 4)
6602 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
6603 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP14]]
6604 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* %agg.result to i8*
6605 // CHECK: [[TMP16:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
6606 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false)
6607 // CHECK: ret void
test_vld3q_lane_u32(uint32_t const * a,uint32x4x3_t b)6608 uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) {
6609 return vld3q_lane_u32(a, b, 3);
6610 }
6611
6612 // CHECK-LABEL: define void @test_vld3q_lane_s16(%struct.int16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 {
6613 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
6614 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
6615 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
6616 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
6617 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
6618 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
6619 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
6620 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
6621 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
6622 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
6623 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6624 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
6625 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
6626 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6627 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6628 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
6629 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6630 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6631 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6632 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
6633 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6634 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6635 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6636 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6637 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6638 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6639 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2)
6640 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
6641 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]]
6642 // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* %agg.result to i8*
6643 // CHECK: [[TMP16:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
6644 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false)
6645 // CHECK: ret void
test_vld3q_lane_s16(int16_t const * a,int16x8x3_t b)6646 int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) {
6647 return vld3q_lane_s16(a, b, 7);
6648 }
6649
6650 // CHECK-LABEL: define void @test_vld3q_lane_s32(%struct.int32x4x3_t* noalias sret %agg.result, i32* %a, [6 x i64] %b.coerce) #0 {
6651 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
6652 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
6653 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
6654 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
6655 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
6656 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
6657 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
6658 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
6659 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
6660 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
6661 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6662 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
6663 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
6664 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6665 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6666 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
6667 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6668 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6669 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6670 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
6671 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6672 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6673 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6674 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6675 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6676 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6677 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], i32 3, i32 4)
6678 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
6679 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP14]]
6680 // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* %agg.result to i8*
6681 // CHECK: [[TMP16:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
6682 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false)
6683 // CHECK: ret void
test_vld3q_lane_s32(int32_t const * a,int32x4x3_t b)6684 int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
6685 return vld3q_lane_s32(a, b, 3);
6686 }
6687
6688 // CHECK-LABEL: define void @test_vld3q_lane_f16(%struct.float16x8x3_t* noalias sret %agg.result, half* %a, [6 x i64] %b.coerce) #0 {
6689 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
6690 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
6691 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
6692 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
6693 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
6694 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
6695 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
6696 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
6697 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
6698 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
6699 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
6700 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
6701 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
6702 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
6703 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
6704 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
6705 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
6706 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
6707 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
6708 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
6709 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
6710 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
6711 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
6712 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6713 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6714 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6715 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2)
6716 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
6717 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]]
6718 // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* %agg.result to i8*
6719 // CHECK: [[TMP16:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
6720 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false)
6721 // CHECK: ret void
test_vld3q_lane_f16(float16_t const * a,float16x8x3_t b)6722 float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
6723 return vld3q_lane_f16(a, b, 7);
6724 }
6725
6726 // CHECK-LABEL: define void @test_vld3q_lane_f32(%struct.float32x4x3_t* noalias sret %agg.result, float* %a, [6 x i64] %b.coerce) #0 {
6727 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
6728 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
6729 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
6730 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
6731 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
6732 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
6733 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
6734 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
6735 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
6736 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
6737 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
6738 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
6739 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
6740 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
6741 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
6742 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
6743 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
6744 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
6745 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
6746 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
6747 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
6748 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
6749 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
6750 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
6751 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
6752 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
6753 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], i32 3, i32 4)
6754 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float>, <4 x float> }*
6755 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_LANE_V]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP14]]
6756 // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* %agg.result to i8*
6757 // CHECK: [[TMP16:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
6758 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false)
6759 // CHECK: ret void
test_vld3q_lane_f32(float32_t const * a,float32x4x3_t b)6760 float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) {
6761 return vld3q_lane_f32(a, b, 3);
6762 }
6763
6764 // CHECK-LABEL: define void @test_vld3q_lane_p16(%struct.poly16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 {
6765 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
6766 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
6767 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
6768 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
6769 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
6770 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
6771 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
6772 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
6773 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
6774 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
6775 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6776 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
6777 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
6778 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6779 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6780 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
6781 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6782 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6783 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6784 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
6785 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6786 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6787 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6788 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6789 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6790 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6791 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2)
6792 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
6793 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]]
6794 // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* %agg.result to i8*
6795 // CHECK: [[TMP16:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
6796 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false)
6797 // CHECK: ret void
test_vld3q_lane_p16(poly16_t const * a,poly16x8x3_t b)6798 poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) {
6799 return vld3q_lane_p16(a, b, 7);
6800 }
6801
6802 // CHECK-LABEL: define void @test_vld3_lane_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 {
6803 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
6804 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
6805 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
6806 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
6807 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
6808 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
6809 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
6810 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
6811 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
6812 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
6813 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
6814 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
6815 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6816 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
6817 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6818 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6819 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
6820 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6821 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6822 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
6823 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
6824 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
6825 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8*
6826 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
6827 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false)
6828 // CHECK: ret void
test_vld3_lane_u8(uint8_t const * a,uint8x8x3_t b)6829 uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) {
6830 return vld3_lane_u8(a, b, 7);
6831 }
6832
6833 // CHECK-LABEL: define void @test_vld3_lane_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 {
6834 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
6835 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
6836 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
6837 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
6838 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
6839 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
6840 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
6841 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
6842 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
6843 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
6844 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6845 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
6846 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
6847 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6848 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6849 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
6850 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6851 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6852 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6853 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
6854 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6855 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6856 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6857 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6858 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6859 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6860 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2)
6861 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6862 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]]
6863 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8*
6864 // CHECK: [[TMP16:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
6865 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false)
6866 // CHECK: ret void
test_vld3_lane_u16(uint16_t const * a,uint16x4x3_t b)6867 uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) {
6868 return vld3_lane_u16(a, b, 3);
6869 }
6870
6871 // CHECK-LABEL: define void @test_vld3_lane_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a, [3 x i64] %b.coerce) #0 {
6872 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
6873 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
6874 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
6875 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
6876 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
6877 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
6878 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
6879 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
6880 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
6881 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
6882 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6883 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
6884 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
6885 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6886 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6887 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
6888 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6889 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6890 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6891 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
6892 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
6893 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
6894 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6895 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6896 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6897 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6898 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], i32 1, i32 4)
6899 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
6900 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP14]]
6901 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8*
6902 // CHECK: [[TMP16:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
6903 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false)
6904 // CHECK: ret void
test_vld3_lane_u32(uint32_t const * a,uint32x2x3_t b)6905 uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) {
6906 return vld3_lane_u32(a, b, 1);
6907 }
6908
6909 // CHECK-LABEL: define void @test_vld3_lane_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 {
6910 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
6911 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
6912 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
6913 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
6914 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
6915 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
6916 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
6917 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
6918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
6919 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
6920 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
6921 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
6922 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6923 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
6924 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6925 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6926 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
6927 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6928 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6929 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
6930 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
6931 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
6932 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8*
6933 // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
6934 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false)
6935 // CHECK: ret void
test_vld3_lane_s8(int8_t const * a,int8x8x3_t b)6936 int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) {
6937 return vld3_lane_s8(a, b, 7);
6938 }
6939
6940 // CHECK-LABEL: define void @test_vld3_lane_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 {
6941 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
6942 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
6943 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
6944 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
6945 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
6946 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
6947 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
6948 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
6949 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
6950 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
6951 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6952 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
6953 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
6954 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6955 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6956 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
6957 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6958 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6959 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6960 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
6961 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6962 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6963 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6964 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6965 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6966 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6967 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2)
6968 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
6969 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]]
6970 // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8*
6971 // CHECK: [[TMP16:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
6972 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false)
6973 // CHECK: ret void
test_vld3_lane_s16(int16_t const * a,int16x4x3_t b)6974 int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) {
6975 return vld3_lane_s16(a, b, 3);
6976 }
6977
6978 // CHECK-LABEL: define void @test_vld3_lane_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a, [3 x i64] %b.coerce) #0 {
6979 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
6980 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
6981 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
6982 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
6983 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
6984 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
6985 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
6986 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
6987 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
6988 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
6989 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6990 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
6991 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
6992 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6993 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6994 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
6995 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6996 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6997 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6998 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
6999 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
7000 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
7001 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
7002 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
7003 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
7004 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
7005 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], i32 1, i32 4)
7006 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
7007 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP14]]
7008 // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8*
7009 // CHECK: [[TMP16:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
7010 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false)
7011 // CHECK: ret void
test_vld3_lane_s32(int32_t const * a,int32x2x3_t b)7012 int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
7013 return vld3_lane_s32(a, b, 1);
7014 }
7015
7016 // CHECK-LABEL: define void @test_vld3_lane_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a, [3 x i64] %b.coerce) #0 {
7017 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
7018 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
7019 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
7020 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
7021 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
7022 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
7023 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
7024 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
7025 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
7026 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
7027 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
7028 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
7029 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
7030 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
7031 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
7032 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
7033 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
7034 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
7035 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
7036 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
7037 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
7038 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
7039 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
7040 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
7041 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
7042 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
7043 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2)
7044 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
7045 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]]
7046 // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8*
7047 // CHECK: [[TMP16:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
7048 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false)
7049 // CHECK: ret void
test_vld3_lane_f16(float16_t const * a,float16x4x3_t b)7050 float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
7051 return vld3_lane_f16(a, b, 3);
7052 }
7053
7054 // CHECK-LABEL: define void @test_vld3_lane_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a, [3 x i64] %b.coerce) #0 {
7055 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
7056 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
7057 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
7058 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
7059 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
7060 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
7061 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
7062 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
7063 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
7064 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
7065 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
7066 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
7067 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
7068 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
7069 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
7070 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
7071 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
7072 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
7073 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
7074 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
7075 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
7076 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
7077 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
7078 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
7079 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
7080 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
7081 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], i32 1, i32 4)
7082 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float>, <2 x float> }*
7083 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE_V]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP14]]
7084 // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8*
7085 // CHECK: [[TMP16:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
7086 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false)
7087 // CHECK: ret void
test_vld3_lane_f32(float32_t const * a,float32x2x3_t b)7088 float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) {
7089 return vld3_lane_f32(a, b, 1);
7090 }
7091
7092 // CHECK-LABEL: define void @test_vld3_lane_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 {
7093 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
7094 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
7095 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
7096 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
7097 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
7098 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
7099 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
7100 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
7101 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
7102 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
7103 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
7104 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
7105 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
7106 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
7107 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
7108 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
7109 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
7110 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
7111 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
7112 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
7113 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
7114 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
7115 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8*
7116 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
7117 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false)
7118 // CHECK: ret void
test_vld3_lane_p8(poly8_t const * a,poly8x8x3_t b)7119 poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) {
7120 return vld3_lane_p8(a, b, 7);
7121 }
7122
7123 // CHECK-LABEL: define void @test_vld3_lane_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 {
7124 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
7125 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
7126 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
7127 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
7128 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
7129 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
7130 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
7131 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
7132 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
7133 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
7134 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
7135 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
7136 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
7137 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
7138 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
7139 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
7140 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
7141 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
7142 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
7143 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
7144 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
7145 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
7146 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
7147 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
7148 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
7149 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
7150 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2)
7151 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
7152 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]]
7153 // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8*
7154 // CHECK: [[TMP16:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
7155 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false)
7156 // CHECK: ret void
test_vld3_lane_p16(poly16_t const * a,poly16x4x3_t b)7157 poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) {
7158 return vld3_lane_p16(a, b, 3);
7159 }
7160
7161
7162 // CHECK-LABEL: define void @test_vld4q_u8(%struct.uint8x16x4_t* noalias sret %agg.result, i8* %a) #0 {
7163 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
7164 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
7165 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1)
7166 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
7167 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
7168 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* %agg.result to i8*
7169 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
7170 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false)
7171 // CHECK: ret void
test_vld4q_u8(uint8_t const * a)7172 uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
7173 return vld4q_u8(a);
7174 }
7175
7176 // CHECK-LABEL: define void @test_vld4q_u16(%struct.uint16x8x4_t* noalias sret %agg.result, i16* %a) #0 {
7177 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
7178 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
7179 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
7180 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2)
7181 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
7182 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]]
7183 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* %agg.result to i8*
7184 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
7185 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false)
7186 // CHECK: ret void
test_vld4q_u16(uint16_t const * a)7187 uint16x8x4_t test_vld4q_u16(uint16_t const * a) {
7188 return vld4q_u16(a);
7189 }
7190
7191 // CHECK-LABEL: define void @test_vld4q_u32(%struct.uint32x4x4_t* noalias sret %agg.result, i32* %a) #0 {
7192 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
7193 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
7194 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
7195 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4)
7196 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
7197 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]]
7198 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* %agg.result to i8*
7199 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
7200 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false)
7201 // CHECK: ret void
test_vld4q_u32(uint32_t const * a)7202 uint32x4x4_t test_vld4q_u32(uint32_t const * a) {
7203 return vld4q_u32(a);
7204 }
7205
7206 // CHECK-LABEL: define void @test_vld4q_s8(%struct.int8x16x4_t* noalias sret %agg.result, i8* %a) #0 {
7207 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
7208 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
7209 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1)
7210 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
7211 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
7212 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* %agg.result to i8*
7213 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
7214 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false)
7215 // CHECK: ret void
test_vld4q_s8(int8_t const * a)7216 int8x16x4_t test_vld4q_s8(int8_t const * a) {
7217 return vld4q_s8(a);
7218 }
7219
7220 // CHECK-LABEL: define void @test_vld4q_s16(%struct.int16x8x4_t* noalias sret %agg.result, i16* %a) #0 {
7221 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
7222 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
7223 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
7224 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2)
7225 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
7226 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]]
7227 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* %agg.result to i8*
7228 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
7229 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false)
7230 // CHECK: ret void
test_vld4q_s16(int16_t const * a)7231 int16x8x4_t test_vld4q_s16(int16_t const * a) {
7232 return vld4q_s16(a);
7233 }
7234
7235 // CHECK-LABEL: define void @test_vld4q_s32(%struct.int32x4x4_t* noalias sret %agg.result, i32* %a) #0 {
7236 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
7237 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
7238 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
7239 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4)
7240 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
7241 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]]
7242 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* %agg.result to i8*
7243 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
7244 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false)
7245 // CHECK: ret void
test_vld4q_s32(int32_t const * a)7246 int32x4x4_t test_vld4q_s32(int32_t const * a) {
7247 return vld4q_s32(a);
7248 }
7249
7250 // CHECK-LABEL: define void @test_vld4q_f16(%struct.float16x8x4_t* noalias sret %agg.result, half* %a) #0 {
7251 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
7252 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
7253 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
7254 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2)
7255 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
7256 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]]
7257 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* %agg.result to i8*
7258 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
7259 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false)
7260 // CHECK: ret void
test_vld4q_f16(float16_t const * a)7261 float16x8x4_t test_vld4q_f16(float16_t const * a) {
7262 return vld4q_f16(a);
7263 }
7264
7265 // CHECK-LABEL: define void @test_vld4q_f32(%struct.float32x4x4_t* noalias sret %agg.result, float* %a) #0 {
7266 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
7267 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
7268 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
7269 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0i8(i8* [[TMP1]], i32 4)
7270 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
7271 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_V]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP2]]
7272 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* %agg.result to i8*
7273 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
7274 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false)
7275 // CHECK: ret void
test_vld4q_f32(float32_t const * a)7276 float32x4x4_t test_vld4q_f32(float32_t const * a) {
7277 return vld4q_f32(a);
7278 }
7279
7280 // CHECK-LABEL: define void @test_vld4q_p8(%struct.poly8x16x4_t* noalias sret %agg.result, i8* %a) #0 {
7281 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
7282 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
7283 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1)
7284 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
7285 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
7286 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* %agg.result to i8*
7287 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
7288 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false)
7289 // CHECK: ret void
test_vld4q_p8(poly8_t const * a)7290 poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
7291 return vld4q_p8(a);
7292 }
7293
7294 // CHECK-LABEL: define void @test_vld4q_p16(%struct.poly16x8x4_t* noalias sret %agg.result, i16* %a) #0 {
7295 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
7296 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
7297 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
7298 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2)
7299 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
7300 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]]
7301 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* %agg.result to i8*
7302 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
7303 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false)
7304 // CHECK: ret void
test_vld4q_p16(poly16_t const * a)7305 poly16x8x4_t test_vld4q_p16(poly16_t const * a) {
7306 return vld4q_p16(a);
7307 }
7308
7309 // CHECK-LABEL: define void @test_vld4_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a) #0 {
7310 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
7311 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
7312 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1)
7313 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
7314 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
7315 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8*
7316 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
7317 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false)
7318 // CHECK: ret void
test_vld4_u8(uint8_t const * a)7319 uint8x8x4_t test_vld4_u8(uint8_t const * a) {
7320 return vld4_u8(a);
7321 }
7322
7323 // CHECK-LABEL: define void @test_vld4_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a) #0 {
7324 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
7325 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
7326 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
7327 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2)
7328 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
7329 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]]
7330 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8*
7331 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
7332 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7333 // CHECK: ret void
test_vld4_u16(uint16_t const * a)7334 uint16x4x4_t test_vld4_u16(uint16_t const * a) {
7335 return vld4_u16(a);
7336 }
7337
7338 // CHECK-LABEL: define void @test_vld4_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a) #0 {
7339 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
7340 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
7341 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
7342 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4)
7343 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
7344 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]]
7345 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8*
7346 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
7347 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7348 // CHECK: ret void
test_vld4_u32(uint32_t const * a)7349 uint32x2x4_t test_vld4_u32(uint32_t const * a) {
7350 return vld4_u32(a);
7351 }
7352
7353 // CHECK-LABEL: define void @test_vld4_u64(%struct.uint64x1x4_t* noalias sret %agg.result, i64* %a) #0 {
7354 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
7355 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
7356 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
7357 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4)
7358 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
7359 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]]
7360 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x4_t* %agg.result to i8*
7361 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
7362 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7363 // CHECK: ret void
test_vld4_u64(uint64_t const * a)7364 uint64x1x4_t test_vld4_u64(uint64_t const * a) {
7365 return vld4_u64(a);
7366 }
7367
7368 // CHECK-LABEL: define void @test_vld4_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a) #0 {
7369 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
7370 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
7371 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1)
7372 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
7373 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
7374 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8*
7375 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
7376 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false)
7377 // CHECK: ret void
test_vld4_s8(int8_t const * a)7378 int8x8x4_t test_vld4_s8(int8_t const * a) {
7379 return vld4_s8(a);
7380 }
7381
7382 // CHECK-LABEL: define void @test_vld4_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a) #0 {
7383 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
7384 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
7385 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
7386 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2)
7387 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
7388 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]]
7389 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8*
7390 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
7391 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7392 // CHECK: ret void
test_vld4_s16(int16_t const * a)7393 int16x4x4_t test_vld4_s16(int16_t const * a) {
7394 return vld4_s16(a);
7395 }
7396
7397 // CHECK-LABEL: define void @test_vld4_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a) #0 {
7398 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
7399 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
7400 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
7401 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4)
7402 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
7403 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]]
7404 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8*
7405 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
7406 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7407 // CHECK: ret void
test_vld4_s32(int32_t const * a)7408 int32x2x4_t test_vld4_s32(int32_t const * a) {
7409 return vld4_s32(a);
7410 }
7411
7412 // CHECK-LABEL: define void @test_vld4_s64(%struct.int64x1x4_t* noalias sret %agg.result, i64* %a) #0 {
7413 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
7414 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
7415 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
7416 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4)
7417 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
7418 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]]
7419 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x4_t* %agg.result to i8*
7420 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
7421 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7422 // CHECK: ret void
test_vld4_s64(int64_t const * a)7423 int64x1x4_t test_vld4_s64(int64_t const * a) {
7424 return vld4_s64(a);
7425 }
7426
7427 // CHECK-LABEL: define void @test_vld4_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a) #0 {
7428 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
7429 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
7430 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
7431 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2)
7432 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
7433 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]]
7434 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8*
7435 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
7436 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7437 // CHECK: ret void
test_vld4_f16(float16_t const * a)7438 float16x4x4_t test_vld4_f16(float16_t const * a) {
7439 return vld4_f16(a);
7440 }
7441
7442 // CHECK-LABEL: define void @test_vld4_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a) #0 {
7443 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
7444 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
7445 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
7446 // CHECK: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32.p0i8(i8* [[TMP1]], i32 4)
7447 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
7448 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_V]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP2]]
7449 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8*
7450 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
7451 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7452 // CHECK: ret void
test_vld4_f32(float32_t const * a)7453 float32x2x4_t test_vld4_f32(float32_t const * a) {
7454 return vld4_f32(a);
7455 }
7456
7457 // CHECK-LABEL: define void @test_vld4_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a) #0 {
7458 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
7459 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
7460 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1)
7461 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
7462 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
7463 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8*
7464 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
7465 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false)
7466 // CHECK: ret void
test_vld4_p8(poly8_t const * a)7467 poly8x8x4_t test_vld4_p8(poly8_t const * a) {
7468 return vld4_p8(a);
7469 }
7470
7471 // CHECK-LABEL: define void @test_vld4_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a) #0 {
7472 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
7473 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
7474 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
7475 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2)
7476 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
7477 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]]
7478 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8*
7479 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
7480 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7481 // CHECK: ret void
test_vld4_p16(poly16_t const * a)7482 poly16x4x4_t test_vld4_p16(poly16_t const * a) {
7483 return vld4_p16(a);
7484 }
7485
7486
7487 // CHECK-LABEL: define void @test_vld4_dup_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a) #0 {
7488 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
7489 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
7490 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
7491 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0
7492 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
7493 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0
7494 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1
7495 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer
7496 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1
7497 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
7498 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer
7499 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2
7500 // CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3
7501 // CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer
7502 // CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3
7503 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
7504 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]]
7505 // CHECK: [[TMP10:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8*
7506 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
7507 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false)
7508 // CHECK: ret void
test_vld4_dup_u8(uint8_t const * a)7509 uint8x8x4_t test_vld4_dup_u8(uint8_t const * a) {
7510 return vld4_dup_u8(a);
7511 }
7512
7513 // CHECK-LABEL: define void @test_vld4_dup_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a) #0 {
7514 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
7515 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
7516 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
7517 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
7518 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
7519 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
7520 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
7521 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1
7522 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
7523 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
7524 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2
7525 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer
7526 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2
7527 // CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3
7528 // CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer
7529 // CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3
7530 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
7531 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]]
7532 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8*
7533 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
7534 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false)
7535 // CHECK: ret void
test_vld4_dup_u16(uint16_t const * a)7536 uint16x4x4_t test_vld4_dup_u16(uint16_t const * a) {
7537 return vld4_dup_u16(a);
7538 }
7539
7540 // CHECK-LABEL: define void @test_vld4_dup_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a) #0 {
7541 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
7542 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
7543 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
7544 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
7545 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0
7546 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
7547 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0
7548 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1
7549 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer
7550 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1
7551 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2
7552 // CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer
7553 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2
7554 // CHECK: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], 3
7555 // CHECK: [[LANE3:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP8]], <2 x i32> zeroinitializer
7556 // CHECK: [[TMP9:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], <2 x i32> [[LANE3]], 3
7557 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
7558 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP10]]
7559 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8*
7560 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
7561 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false)
7562 // CHECK: ret void
test_vld4_dup_u32(uint32_t const * a)7563 uint32x2x4_t test_vld4_dup_u32(uint32_t const * a) {
7564 return vld4_dup_u32(a);
7565 }
7566
7567 // CHECK-LABEL: define void @test_vld4_dup_u64(%struct.uint64x1x4_t* noalias sret %agg.result, i64* %a) #0 {
7568 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
7569 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
7570 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
7571 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4)
7572 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
7573 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]]
7574 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x4_t* %agg.result to i8*
7575 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
7576 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7577 // CHECK: ret void
test_vld4_dup_u64(uint64_t const * a)7578 uint64x1x4_t test_vld4_dup_u64(uint64_t const * a) {
7579 return vld4_dup_u64(a);
7580 }
7581
7582 // CHECK-LABEL: define void @test_vld4_dup_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a) #0 {
7583 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
7584 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
7585 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
7586 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0
7587 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
7588 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0
7589 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1
7590 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer
7591 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1
7592 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
7593 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer
7594 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2
7595 // CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3
7596 // CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer
7597 // CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3
7598 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
7599 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]]
7600 // CHECK: [[TMP10:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8*
7601 // CHECK: [[TMP11:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
7602 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false)
7603 // CHECK: ret void
test_vld4_dup_s8(int8_t const * a)7604 int8x8x4_t test_vld4_dup_s8(int8_t const * a) {
7605 return vld4_dup_s8(a);
7606 }
7607
7608 // CHECK-LABEL: define void @test_vld4_dup_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a) #0 {
7609 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
7610 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
7611 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
7612 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
7613 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
7614 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
7615 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
7616 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1
7617 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
7618 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
7619 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2
7620 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer
7621 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2
7622 // CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3
7623 // CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer
7624 // CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3
7625 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
7626 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]]
7627 // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8*
7628 // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
7629 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false)
7630 // CHECK: ret void
test_vld4_dup_s16(int16_t const * a)7631 int16x4x4_t test_vld4_dup_s16(int16_t const * a) {
7632 return vld4_dup_s16(a);
7633 }
7634
7635 // CHECK-LABEL: define void @test_vld4_dup_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a) #0 {
7636 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
7637 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
7638 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
7639 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
7640 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0
7641 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
7642 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0
7643 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1
7644 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer
7645 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1
7646 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2
7647 // CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer
7648 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2
7649 // CHECK: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], 3
7650 // CHECK: [[LANE3:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP8]], <2 x i32> zeroinitializer
7651 // CHECK: [[TMP9:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], <2 x i32> [[LANE3]], 3
7652 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
7653 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP10]]
7654 // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8*
7655 // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
7656 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false)
7657 // CHECK: ret void
test_vld4_dup_s32(int32_t const * a)7658 int32x2x4_t test_vld4_dup_s32(int32_t const * a) {
7659 return vld4_dup_s32(a);
7660 }
7661
7662 // CHECK-LABEL: define void @test_vld4_dup_s64(%struct.int64x1x4_t* noalias sret %agg.result, i64* %a) #0 {
7663 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
7664 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
7665 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
7666 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4)
7667 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
7668 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]]
7669 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x4_t* %agg.result to i8*
7670 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
7671 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
7672 // CHECK: ret void
test_vld4_dup_s64(int64_t const * a)7673 int64x1x4_t test_vld4_dup_s64(int64_t const * a) {
7674 return vld4_dup_s64(a);
7675 }
7676
7677 // CHECK-LABEL: define void @test_vld4_dup_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a) #0 {
7678 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
7679 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
7680 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
7681 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
7682 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
7683 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
7684 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
7685 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1
7686 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
7687 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
7688 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2
7689 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer
7690 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2
7691 // CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3
7692 // CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer
7693 // CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3
7694 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
7695 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]]
7696 // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8*
7697 // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
7698 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false)
7699 // CHECK: ret void
test_vld4_dup_f16(float16_t const * a)7700 float16x4x4_t test_vld4_dup_f16(float16_t const * a) {
7701 return vld4_dup_f16(a);
7702 }
7703
7704 // CHECK-LABEL: define void @test_vld4_dup_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a) #0 {
7705 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
7706 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
7707 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
7708 // CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
7709 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], 0
7710 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
7711 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0
7712 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP3]], 1
7713 // CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer
7714 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1
7715 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP5]], 2
7716 // CHECK: [[LANE2:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer
7717 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP5]], <2 x float> [[LANE2]], 2
7718 // CHECK: [[TMP8:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP7]], 3
7719 // CHECK: [[LANE3:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP8]], <2 x i32> zeroinitializer
7720 // CHECK: [[TMP9:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP7]], <2 x float> [[LANE3]], 3
7721 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
7722 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP9]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP10]]
7723 // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8*
7724 // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
7725 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false)
7726 // CHECK: ret void
test_vld4_dup_f32(float32_t const * a)7727 float32x2x4_t test_vld4_dup_f32(float32_t const * a) {
7728 return vld4_dup_f32(a);
7729 }
7730
7731 // CHECK-LABEL: define void @test_vld4_dup_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a) #0 {
7732 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
7733 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
7734 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
7735 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0
7736 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
7737 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0
7738 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1
7739 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer
7740 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1
7741 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
7742 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer
7743 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2
7744 // CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3
7745 // CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer
7746 // CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3
7747 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
7748 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]]
7749 // CHECK: [[TMP10:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8*
7750 // CHECK: [[TMP11:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
7751 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false)
7752 // CHECK: ret void
test_vld4_dup_p8(poly8_t const * a)7753 poly8x8x4_t test_vld4_dup_p8(poly8_t const * a) {
7754 return vld4_dup_p8(a);
7755 }
7756
7757 // CHECK-LABEL: define void @test_vld4_dup_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a) #0 {
7758 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
7759 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
7760 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
7761 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
7762 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0
7763 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer
7764 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0
7765 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1
7766 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer
7767 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1
7768 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2
7769 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer
7770 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2
7771 // CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3
7772 // CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer
7773 // CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3
7774 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
7775 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]]
7776 // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8*
7777 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
7778 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false)
7779 // CHECK: ret void
test_vld4_dup_p16(poly16_t const * a)7780 poly16x4x4_t test_vld4_dup_p16(poly16_t const * a) {
7781 return vld4_dup_p16(a);
7782 }
7783
7784
7785 // CHECK-LABEL: define void @test_vld4q_lane_u16(%struct.uint16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 {
7786 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
7787 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
7788 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
7789 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
7790 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
7791 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
7792 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
7793 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
7794 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
7795 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
7796 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
7797 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
7798 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
7799 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
7800 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
7801 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
7802 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
7803 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
7804 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
7805 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
7806 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
7807 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
7808 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
7809 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
7810 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
7811 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
7812 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
7813 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
7814 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
7815 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
7816 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
7817 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2)
7818 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
7819 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]]
7820 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* %agg.result to i8*
7821 // CHECK: [[TMP19:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
7822 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false)
7823 // CHECK: ret void
test_vld4q_lane_u16(uint16_t const * a,uint16x8x4_t b)7824 uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) {
7825 return vld4q_lane_u16(a, b, 7);
7826 }
7827
7828 // CHECK-LABEL: define void @test_vld4q_lane_u32(%struct.uint32x4x4_t* noalias sret %agg.result, i32* %a, [8 x i64] %b.coerce) #0 {
7829 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
7830 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
7831 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
7832 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
7833 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
7834 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
7835 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
7836 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
7837 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
7838 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
7839 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
7840 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
7841 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
7842 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
7843 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
7844 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
7845 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
7846 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
7847 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
7848 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
7849 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
7850 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
7851 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
7852 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
7853 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
7854 // CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
7855 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
7856 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
7857 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
7858 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
7859 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
7860 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], i32 3, i32 4)
7861 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
7862 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP17]]
7863 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* %agg.result to i8*
7864 // CHECK: [[TMP19:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
7865 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false)
7866 // CHECK: ret void
test_vld4q_lane_u32(uint32_t const * a,uint32x4x4_t b)7867 uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) {
7868 return vld4q_lane_u32(a, b, 3);
7869 }
7870
7871 // CHECK-LABEL: define void @test_vld4q_lane_s16(%struct.int16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 {
7872 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
7873 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
7874 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
7875 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
7876 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
7877 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
7878 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
7879 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
7880 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
7881 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
7882 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
7883 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
7884 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
7885 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
7886 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
7887 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
7888 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
7889 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
7890 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
7891 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
7892 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
7893 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
7894 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
7895 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
7896 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
7897 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
7898 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
7899 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
7900 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
7901 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
7902 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
7903 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2)
7904 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
7905 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]]
7906 // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* %agg.result to i8*
7907 // CHECK: [[TMP19:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
7908 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false)
7909 // CHECK: ret void
test_vld4q_lane_s16(int16_t const * a,int16x8x4_t b)7910 int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) {
7911 return vld4q_lane_s16(a, b, 7);
7912 }
7913
7914 // CHECK-LABEL: define void @test_vld4q_lane_s32(%struct.int32x4x4_t* noalias sret %agg.result, i32* %a, [8 x i64] %b.coerce) #0 {
7915 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
7916 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
7917 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
7918 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
7919 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
7920 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
7921 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
7922 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
7923 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
7924 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
7925 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
7926 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
7927 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
7928 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
7929 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
7930 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
7931 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
7932 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
7933 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
7934 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
7935 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
7936 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
7937 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
7938 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
7939 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
7940 // CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
7941 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
7942 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
7943 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
7944 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
7945 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
7946 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], i32 3, i32 4)
7947 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
7948 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP17]]
7949 // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* %agg.result to i8*
7950 // CHECK: [[TMP19:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
7951 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false)
7952 // CHECK: ret void
test_vld4q_lane_s32(int32_t const * a,int32x4x4_t b)7953 int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
7954 return vld4q_lane_s32(a, b, 3);
7955 }
7956
7957 // CHECK-LABEL: define void @test_vld4q_lane_f16(%struct.float16x8x4_t* noalias sret %agg.result, half* %a, [8 x i64] %b.coerce) #0 {
7958 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
7959 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
7960 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
7961 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
7962 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
7963 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
7964 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
7965 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
7966 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
7967 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
7968 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
7969 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
7970 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
7971 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
7972 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
7973 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
7974 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
7975 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
7976 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
7977 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
7978 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
7979 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
7980 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
7981 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
7982 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
7983 // CHECK: [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
7984 // CHECK: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8>
7985 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
7986 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
7987 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
7988 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
7989 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2)
7990 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
7991 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]]
7992 // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* %agg.result to i8*
7993 // CHECK: [[TMP19:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
7994 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false)
7995 // CHECK: ret void
test_vld4q_lane_f16(float16_t const * a,float16x8x4_t b)7996 float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
7997 return vld4q_lane_f16(a, b, 7);
7998 }
7999
8000 // CHECK-LABEL: define void @test_vld4q_lane_f32(%struct.float32x4x4_t* noalias sret %agg.result, float* %a, [8 x i64] %b.coerce) #0 {
8001 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
8002 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
8003 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
8004 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
8005 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
8006 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
8007 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
8008 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
8009 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
8010 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
8011 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
8012 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
8013 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
8014 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
8015 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
8016 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
8017 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
8018 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
8019 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
8020 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
8021 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
8022 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
8023 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
8024 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
8025 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
8026 // CHECK: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
8027 // CHECK: [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8>
8028 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
8029 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
8030 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
8031 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float>
8032 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], <4 x float> [[TMP16]], i32 3, i32 4)
8033 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
8034 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_LANE_V]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP17]]
8035 // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* %agg.result to i8*
8036 // CHECK: [[TMP19:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
8037 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false)
8038 // CHECK: ret void
test_vld4q_lane_f32(float32_t const * a,float32x4x4_t b)8039 float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) {
8040 return vld4q_lane_f32(a, b, 3);
8041 }
8042
8043 // CHECK-LABEL: define void @test_vld4q_lane_p16(%struct.poly16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 {
8044 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
8045 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
8046 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
8047 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
8048 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
8049 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
8050 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
8051 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
8052 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
8053 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
8054 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
8055 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
8056 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
8057 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
8058 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
8059 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
8060 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
8061 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
8062 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
8063 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
8064 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
8065 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
8066 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
8067 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
8068 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
8069 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
8070 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
8071 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
8072 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
8073 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
8074 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
8075 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2)
8076 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
8077 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]]
8078 // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* %agg.result to i8*
8079 // CHECK: [[TMP19:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
8080 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false)
8081 // CHECK: ret void
test_vld4q_lane_p16(poly16_t const * a,poly16x8x4_t b)8082 poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) {
8083 return vld4q_lane_p16(a, b, 7);
8084 }
8085
8086 // CHECK-LABEL: define void @test_vld4_lane_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 {
8087 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
8088 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
8089 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
8090 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
8091 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
8092 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8093 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
8094 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
8095 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8096 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
8097 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
8098 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
8099 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
8100 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
8101 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
8102 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
8103 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
8104 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
8105 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
8106 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
8107 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
8108 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
8109 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1)
8110 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
8111 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]]
8112 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8*
8113 // CHECK: [[TMP10:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
8114 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false)
8115 // CHECK: ret void
test_vld4_lane_u8(uint8_t const * a,uint8x8x4_t b)8116 uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) {
8117 return vld4_lane_u8(a, b, 7);
8118 }
8119
8120 // CHECK-LABEL: define void @test_vld4_lane_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 {
8121 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
8122 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
8123 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
8124 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
8125 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
8126 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8127 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
8128 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
8129 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8130 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
8131 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
8132 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
8133 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
8134 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
8135 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
8136 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
8137 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
8138 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
8139 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
8140 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
8141 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
8142 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
8143 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
8144 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
8145 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
8146 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
8147 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
8148 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
8149 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
8150 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
8151 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
8152 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2)
8153 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
8154 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]]
8155 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8*
8156 // CHECK: [[TMP19:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
8157 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false)
8158 // CHECK: ret void
test_vld4_lane_u16(uint16_t const * a,uint16x4x4_t b)8159 uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) {
8160 return vld4_lane_u16(a, b, 3);
8161 }
8162
8163 // CHECK-LABEL: define void @test_vld4_lane_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 {
8164 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
8165 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
8166 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
8167 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
8168 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
8169 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8170 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
8171 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
8172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8173 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
8174 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
8175 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
8176 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
8177 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
8178 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
8179 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
8180 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
8181 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
8182 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
8183 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
8184 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
8185 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
8186 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
8187 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
8188 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
8189 // CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
8190 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
8191 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
8192 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
8193 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
8194 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
8195 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> [[TMP16]], i32 1, i32 4)
8196 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
8197 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP17]]
8198 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8*
8199 // CHECK: [[TMP19:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
8200 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false)
8201 // CHECK: ret void
test_vld4_lane_u32(uint32_t const * a,uint32x2x4_t b)8202 uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) {
8203 return vld4_lane_u32(a, b, 1);
8204 }
8205
8206 // CHECK-LABEL: define void @test_vld4_lane_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 {
8207 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
8208 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
8209 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
8210 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
8211 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
8212 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8213 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
8214 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
8215 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8216 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
8217 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
8218 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
8219 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
8220 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
8221 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
8222 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
8223 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
8224 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
8225 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
8226 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
8227 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
8228 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
8229 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1)
8230 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
8231 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]]
8232 // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8*
8233 // CHECK: [[TMP10:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
8234 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false)
8235 // CHECK: ret void
test_vld4_lane_s8(int8_t const * a,int8x8x4_t b)8236 int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) {
8237 return vld4_lane_s8(a, b, 7);
8238 }
8239
8240 // CHECK-LABEL: define void @test_vld4_lane_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 {
8241 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
8242 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
8243 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
8244 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
8245 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
8246 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8247 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
8248 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
8249 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8250 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
8251 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
8252 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
8253 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
8254 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
8255 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
8256 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
8257 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
8258 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
8259 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
8260 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
8261 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
8262 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
8263 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
8264 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
8265 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
8266 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
8267 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
8268 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
8269 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
8270 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
8271 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
8272 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2)
8273 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
8274 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]]
8275 // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8*
8276 // CHECK: [[TMP19:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
8277 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false)
8278 // CHECK: ret void
test_vld4_lane_s16(int16_t const * a,int16x4x4_t b)8279 int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) {
8280 return vld4_lane_s16(a, b, 3);
8281 }
8282
8283 // CHECK-LABEL: define void @test_vld4_lane_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 {
8284 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
8285 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
8286 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
8287 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
8288 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
8289 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8290 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
8291 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
8292 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8293 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
8294 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
8295 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
8296 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
8297 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
8298 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
8299 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
8300 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
8301 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
8302 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
8303 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
8304 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
8305 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
8306 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
8307 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
8308 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
8309 // CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
8310 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
8311 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
8312 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
8313 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
8314 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
8315 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> [[TMP16]], i32 1, i32 4)
8316 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
8317 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP17]]
8318 // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8*
8319 // CHECK: [[TMP19:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
8320 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false)
8321 // CHECK: ret void
test_vld4_lane_s32(int32_t const * a,int32x2x4_t b)8322 int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
8323 return vld4_lane_s32(a, b, 1);
8324 }
8325
8326 // CHECK-LABEL: define void @test_vld4_lane_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a, [4 x i64] %b.coerce) #0 {
8327 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
8328 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
8329 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
8330 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
8331 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
8332 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8333 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
8334 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
8335 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8336 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
8337 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
8338 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
8339 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
8340 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
8341 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
8342 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
8343 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
8344 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
8345 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
8346 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
8347 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
8348 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
8349 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
8350 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
8351 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
8352 // CHECK: [[TMP11:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
8353 // CHECK: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8>
8354 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
8355 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
8356 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
8357 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
8358 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2)
8359 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
8360 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]]
8361 // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8*
8362 // CHECK: [[TMP19:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
8363 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false)
8364 // CHECK: ret void
test_vld4_lane_f16(float16_t const * a,float16x4x4_t b)8365 float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
8366 return vld4_lane_f16(a, b, 3);
8367 }
8368
8369 // CHECK-LABEL: define void @test_vld4_lane_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a, [4 x i64] %b.coerce) #0 {
8370 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
8371 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
8372 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
8373 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
8374 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
8375 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8376 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
8377 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
8378 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8379 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
8380 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
8381 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
8382 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
8383 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
8384 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
8385 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
8386 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
8387 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
8388 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
8389 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
8390 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
8391 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
8392 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
8393 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
8394 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
8395 // CHECK: [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
8396 // CHECK: [[TMP12:%.*]] = bitcast <2 x float> [[TMP11]] to <8 x i8>
8397 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
8398 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
8399 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
8400 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float>
8401 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], <2 x float> [[TMP16]], i32 1, i32 4)
8402 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
8403 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE_V]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP17]]
8404 // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8*
8405 // CHECK: [[TMP19:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
8406 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false)
8407 // CHECK: ret void
test_vld4_lane_f32(float32_t const * a,float32x2x4_t b)8408 float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) {
8409 return vld4_lane_f32(a, b, 1);
8410 }
8411
8412 // CHECK-LABEL: define void @test_vld4_lane_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 {
8413 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
8414 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
8415 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
8416 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
8417 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
8418 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8419 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
8420 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
8421 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8422 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
8423 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
8424 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
8425 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
8426 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
8427 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
8428 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
8429 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
8430 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
8431 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
8432 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
8433 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
8434 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
8435 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1)
8436 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
8437 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]]
8438 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8*
8439 // CHECK: [[TMP10:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
8440 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false)
8441 // CHECK: ret void
test_vld4_lane_p8(poly8_t const * a,poly8x8x4_t b)8442 poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) {
8443 return vld4_lane_p8(a, b, 7);
8444 }
8445
8446 // CHECK-LABEL: define void @test_vld4_lane_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 {
8447 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
8448 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
8449 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
8450 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
8451 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
8452 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
8453 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
8454 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
8455 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
8456 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
8457 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
8458 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
8459 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
8460 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
8461 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
8462 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
8463 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
8464 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
8465 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
8466 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
8467 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
8468 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
8469 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
8470 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
8471 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
8472 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
8473 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
8474 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
8475 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
8476 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
8477 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
8478 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2)
8479 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
8480 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]]
8481 // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8*
8482 // CHECK: [[TMP19:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
8483 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false)
8484 // CHECK: ret void
test_vld4_lane_p16(poly16_t const * a,poly16x4x4_t b)8485 poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) {
8486 return vld4_lane_p16(a, b, 3);
8487 }
8488
8489
8490 // CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8491 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8492 // CHECK: ret <8 x i8> [[VMAX_V_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)8493 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
8494 return vmax_s8(a, b);
8495 }
8496
8497 // CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
8498 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8499 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8500 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8501 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8502 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]]) #4
8503 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
8504 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16>
8505 // CHECK: ret <4 x i16> [[TMP2]]
test_vmax_s16(int16x4_t a,int16x4_t b)8506 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
8507 return vmax_s16(a, b);
8508 }
8509
8510 // CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
8511 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8512 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8513 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8514 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8515 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]]) #4
8516 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
8517 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32>
8518 // CHECK: ret <2 x i32> [[TMP2]]
test_vmax_s32(int32x2_t a,int32x2_t b)8519 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
8520 return vmax_s32(a, b);
8521 }
8522
8523 // CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
8524 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8525 // CHECK: ret <8 x i8> [[VMAX_V_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)8526 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
8527 return vmax_u8(a, b);
8528 }
8529
8530 // CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
8531 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8532 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8533 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8534 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8535 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]]) #4
8536 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
8537 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16>
8538 // CHECK: ret <4 x i16> [[TMP2]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)8539 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
8540 return vmax_u16(a, b);
8541 }
8542
8543 // CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
8544 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8545 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8546 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8547 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8548 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]]) #4
8549 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
8550 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32>
8551 // CHECK: ret <2 x i32> [[TMP2]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)8552 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
8553 return vmax_u32(a, b);
8554 }
8555
8556 // CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 {
8557 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8558 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
8559 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8560 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
8561 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> [[VMAX_V_I]], <2 x float> [[VMAX_V1_I]]) #4
8562 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8>
8563 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x float>
8564 // CHECK: ret <2 x float> [[TMP2]]
test_vmax_f32(float32x2_t a,float32x2_t b)8565 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
8566 return vmax_f32(a, b);
8567 }
8568
8569 // CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
8570 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b) #4
8571 // CHECK: ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)8572 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
8573 return vmaxq_s8(a, b);
8574 }
8575
8576 // CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8577 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8578 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8579 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8580 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8581 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]]) #4
8582 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
8583 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16>
8584 // CHECK: ret <8 x i16> [[TMP2]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)8585 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
8586 return vmaxq_s16(a, b);
8587 }
8588
8589 // CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8590 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8591 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8592 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8593 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8594 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]]) #4
8595 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
8596 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32>
8597 // CHECK: ret <4 x i32> [[TMP2]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)8598 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
8599 return vmaxq_s32(a, b);
8600 }
8601
8602 // CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
8603 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
8604 // CHECK: ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)8605 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
8606 return vmaxq_u8(a, b);
8607 }
8608
8609 // CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8610 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8612 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8613 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8614 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]]) #4
8615 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
8616 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16>
8617 // CHECK: ret <8 x i16> [[TMP2]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)8618 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
8619 return vmaxq_u16(a, b);
8620 }
8621
8622 // CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8623 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8624 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8625 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8626 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8627 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]]) #4
8628 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
8629 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32>
8630 // CHECK: ret <4 x i32> [[TMP2]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)8631 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
8632 return vmaxq_u32(a, b);
8633 }
8634
8635 // CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
8636 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
8637 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
8638 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
8639 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
8640 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> [[VMAXQ_V_I]], <4 x float> [[VMAXQ_V1_I]]) #4
8641 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8>
8642 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x float>
8643 // CHECK: ret <4 x float> [[TMP2]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)8644 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
8645 return vmaxq_f32(a, b);
8646 }
8647
8648
8649 // CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8650 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8651 // CHECK: ret <8 x i8> [[VMIN_V_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)8652 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
8653 return vmin_s8(a, b);
8654 }
8655
8656 // CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
8657 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8658 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8659 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8660 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8661 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]]) #4
8662 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
8663 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16>
8664 // CHECK: ret <4 x i16> [[TMP2]]
test_vmin_s16(int16x4_t a,int16x4_t b)8665 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
8666 return vmin_s16(a, b);
8667 }
8668
8669 // CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
8670 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8671 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8672 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8673 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8674 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]]) #4
8675 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
8676 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32>
8677 // CHECK: ret <2 x i32> [[TMP2]]
test_vmin_s32(int32x2_t a,int32x2_t b)8678 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
8679 return vmin_s32(a, b);
8680 }
8681
8682 // CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
8683 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8684 // CHECK: ret <8 x i8> [[VMIN_V_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)8685 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
8686 return vmin_u8(a, b);
8687 }
8688
8689 // CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
8690 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8691 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8692 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8693 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8694 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]]) #4
8695 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
8696 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16>
8697 // CHECK: ret <4 x i16> [[TMP2]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)8698 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
8699 return vmin_u16(a, b);
8700 }
8701
8702 // CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
8703 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8704 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8705 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8706 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8707 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]]) #4
8708 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
8709 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32>
8710 // CHECK: ret <2 x i32> [[TMP2]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)8711 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
8712 return vmin_u32(a, b);
8713 }
8714
8715 // CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 {
8716 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8717 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
8718 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8719 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
8720 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> [[VMIN_V_I]], <2 x float> [[VMIN_V1_I]]) #4
8721 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8>
8722 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x float>
8723 // CHECK: ret <2 x float> [[TMP2]]
test_vmin_f32(float32x2_t a,float32x2_t b)8724 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
8725 return vmin_f32(a, b);
8726 }
8727
8728 // CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
8729 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b) #4
8730 // CHECK: ret <16 x i8> [[VMINQ_V_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)8731 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
8732 return vminq_s8(a, b);
8733 }
8734
8735 // CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8736 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8737 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8738 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8739 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8740 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]]) #4
8741 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
8742 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16>
8743 // CHECK: ret <8 x i16> [[TMP2]]
test_vminq_s16(int16x8_t a,int16x8_t b)8744 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
8745 return vminq_s16(a, b);
8746 }
8747
8748 // CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8749 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8750 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8751 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8752 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8753 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]]) #4
8754 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
8755 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32>
8756 // CHECK: ret <4 x i32> [[TMP2]]
test_vminq_s32(int32x4_t a,int32x4_t b)8757 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
8758 return vminq_s32(a, b);
8759 }
8760
8761 // CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
8762 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
8763 // CHECK: ret <16 x i8> [[VMINQ_V_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)8764 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
8765 return vminq_u8(a, b);
8766 }
8767
8768 // CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8769 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8770 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8771 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8772 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8773 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]]) #4
8774 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
8775 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16>
8776 // CHECK: ret <8 x i16> [[TMP2]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)8777 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
8778 return vminq_u16(a, b);
8779 }
8780
8781 // CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8782 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8783 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8784 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8785 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8786 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]]) #4
8787 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
8788 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32>
8789 // CHECK: ret <4 x i32> [[TMP2]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)8790 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
8791 return vminq_u32(a, b);
8792 }
8793
8794 // CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 {
8795 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
8796 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
8797 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
8798 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
8799 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> [[VMINQ_V_I]], <4 x float> [[VMINQ_V1_I]]) #4
8800 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8>
8801 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x float>
8802 // CHECK: ret <4 x float> [[TMP2]]
test_vminq_f32(float32x4_t a,float32x4_t b)8803 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
8804 return vminq_f32(a, b);
8805 }
8806
8807
8808 // CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8809 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
8810 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
8811 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t a,int8x8_t b,int8x8_t c)8812 int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
8813 return vmla_s8(a, b, c);
8814 }
8815
8816 // CHECK-LABEL: define <4 x i16> @test_vmla_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8817 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
8818 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
8819 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_s16(int16x4_t a,int16x4_t b,int16x4_t c)8820 int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
8821 return vmla_s16(a, b, c);
8822 }
8823
8824 // CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8825 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
8826 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
8827 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t a,int32x2_t b,int32x2_t c)8828 int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
8829 return vmla_s32(a, b, c);
8830 }
8831
8832 // CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
8833 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
8834 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
8835 // CHECK: ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t a,float32x2_t b,float32x2_t c)8836 float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
8837 return vmla_f32(a, b, c);
8838 }
8839
8840 // CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8841 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
8842 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
8843 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)8844 uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
8845 return vmla_u8(a, b, c);
8846 }
8847
8848 // CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8849 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
8850 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
8851 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)8852 uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
8853 return vmla_u16(a, b, c);
8854 }
8855
8856 // CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8857 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
8858 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
8859 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)8860 uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
8861 return vmla_u32(a, b, c);
8862 }
8863
8864 // CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8865 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
8866 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
8867 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)8868 int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
8869 return vmlaq_s8(a, b, c);
8870 }
8871
8872 // CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8873 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
8874 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
8875 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)8876 int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
8877 return vmlaq_s16(a, b, c);
8878 }
8879
8880 // CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8881 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
8882 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
8883 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)8884 int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
8885 return vmlaq_s32(a, b, c);
8886 }
8887
8888 // CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
8889 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
8890 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
8891 // CHECK: ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)8892 float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
8893 return vmlaq_f32(a, b, c);
8894 }
8895
8896 // CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8897 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
8898 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
8899 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)8900 uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
8901 return vmlaq_u8(a, b, c);
8902 }
8903
8904 // CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8905 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
8906 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
8907 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)8908 uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
8909 return vmlaq_u16(a, b, c);
8910 }
8911
8912 // CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8913 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
8914 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
8915 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)8916 uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
8917 return vmlaq_u32(a, b, c);
8918 }
8919
8920
8921 // CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8922 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8923 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8924 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)8925 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8926 return vmlal_s8(a, b, c);
8927 }
8928
8929 // CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8930 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8931 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8932 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8933 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8934 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8935 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8936 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8937 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8938 return vmlal_s16(a, b, c);
8939 }
8940
8941 // CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8942 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8943 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8944 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8945 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8946 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8947 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8948 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8949 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8950 return vmlal_s32(a, b, c);
8951 }
8952
8953 // CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8954 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8955 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8956 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8957 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8958 return vmlal_u8(a, b, c);
8959 }
8960
8961 // CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8962 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8963 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8964 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8965 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8966 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8967 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8968 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8969 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8970 return vmlal_u16(a, b, c);
8971 }
8972
8973 // CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8974 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8975 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8976 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8977 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8978 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8979 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8980 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8981 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8982 return vmlal_u32(a, b, c);
8983 }
8984
8985
8986 // CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8987 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8988 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8989 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
8990 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8991 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8992 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
8993 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
8994 // CHECK: ret <4 x i32> [[ADD]]
test_vmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)8995 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8996 return vmlal_lane_s16(a, b, c, 3);
8997 }
8998
8999 // CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9000 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
9001 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9002 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
9003 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9004 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9005 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
9006 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
9007 // CHECK: ret <2 x i64> [[ADD]]
test_vmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9008 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9009 return vmlal_lane_s32(a, b, c, 1);
9010 }
9011
9012 // CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9013 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9014 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9015 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
9016 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9017 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9018 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
9019 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
9020 // CHECK: ret <4 x i32> [[ADD]]
test_vmlal_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)9021 uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
9022 return vmlal_lane_u16(a, b, c, 3);
9023 }
9024
9025 // CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9026 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
9027 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9028 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
9029 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9030 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9031 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
9032 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
9033 // CHECK: ret <2 x i64> [[ADD]]
test_vmlal_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)9034 uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
9035 return vmlal_lane_u32(a, b, c, 1);
9036 }
9037
9038
9039 // CHECK-LABEL: define <4 x i32> @test_vmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 {
9040 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9041 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9042 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9043 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9044 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9045 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9046 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9047 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9048 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
9049 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
9050 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)9051 int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9052 return vmlal_n_s16(a, b, c);
9053 }
9054
9055 // CHECK-LABEL: define <2 x i64> @test_vmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
9056 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9057 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9058 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9059 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9060 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9061 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9062 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
9063 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
9064 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)9065 int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9066 return vmlal_n_s32(a, b, c);
9067 }
9068
9069 // CHECK-LABEL: define <4 x i32> @test_vmlal_n_u16(<4 x i32> %a, <4 x i16> %b, i16 zeroext %c) #0 {
9070 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9071 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9072 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9073 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9074 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9075 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9076 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9077 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9078 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
9079 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
9080 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)9081 uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
9082 return vmlal_n_u16(a, b, c);
9083 }
9084
9085 // CHECK-LABEL: define <2 x i64> @test_vmlal_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
9086 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9087 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9090 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9091 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9092 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
9093 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
9094 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)9095 uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
9096 return vmlal_n_u32(a, b, c);
9097 }
9098
9099
9100 // CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9101 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9102 // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
9103 // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
9104 // CHECK: ret <4 x i16> [[ADD]]
test_vmla_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)9105 int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
9106 return vmla_lane_s16(a, b, c, 3);
9107 }
9108
9109 // CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9110 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
9111 // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
9112 // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
9113 // CHECK: ret <2 x i32> [[ADD]]
test_vmla_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)9114 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
9115 return vmla_lane_s32(a, b, c, 1);
9116 }
9117
9118 // CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9119 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9120 // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
9121 // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
9122 // CHECK: ret <4 x i16> [[ADD]]
test_vmla_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)9123 uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
9124 return vmla_lane_u16(a, b, c, 3);
9125 }
9126
9127 // CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9128 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
9129 // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
9130 // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
9131 // CHECK: ret <2 x i32> [[ADD]]
test_vmla_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)9132 uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
9133 return vmla_lane_u32(a, b, c, 1);
9134 }
9135
9136 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
9137 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> <i32 1, i32 1>
9138 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
9139 // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
9140 // CHECK: ret <2 x float> [[ADD]]
test_vmla_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)9141 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
9142 return vmla_lane_f32(a, b, c, 1);
9143 }
9144
9145 // CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 {
9146 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
9147 // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
9148 // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
9149 // CHECK: ret <8 x i16> [[ADD]]
test_vmlaq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)9150 int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
9151 return vmlaq_lane_s16(a, b, c, 3);
9152 }
9153
9154 // CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 {
9155 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9156 // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
9157 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
9158 // CHECK: ret <4 x i32> [[ADD]]
test_vmlaq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)9159 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
9160 return vmlaq_lane_s32(a, b, c, 1);
9161 }
9162
9163 // CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 {
9164 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
9165 // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
9166 // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
9167 // CHECK: ret <8 x i16> [[ADD]]
test_vmlaq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)9168 uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
9169 return vmlaq_lane_u16(a, b, c, 3);
9170 }
9171
9172 // CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 {
9173 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9174 // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
9175 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
9176 // CHECK: ret <4 x i32> [[ADD]]
test_vmlaq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)9177 uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
9178 return vmlaq_lane_u32(a, b, c, 1);
9179 }
9180
9181 // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 {
9182 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9183 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
9184 // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
9185 // CHECK: ret <4 x float> [[ADD]]
test_vmlaq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)9186 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
9187 return vmlaq_lane_f32(a, b, c, 1);
9188 }
9189
9190
9191 // CHECK-LABEL: define <4 x i16> @test_vmla_n_s16(<4 x i16> %a, <4 x i16> %b, i16 signext %c) #0 {
9192 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9193 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9194 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9195 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9196 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
9197 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
9198 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_n_s16(int16x4_t a,int16x4_t b,int16_t c)9199 int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
9200 return vmla_n_s16(a, b, c);
9201 }
9202
9203 // CHECK-LABEL: define <2 x i32> @test_vmla_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
9204 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9205 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9206 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
9207 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
9208 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_n_s32(int32x2_t a,int32x2_t b,int32_t c)9209 int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
9210 return vmla_n_s32(a, b, c);
9211 }
9212
9213 // CHECK-LABEL: define <4 x i16> @test_vmla_n_u16(<4 x i16> %a, <4 x i16> %b, i16 zeroext %c) #0 {
9214 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9215 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9216 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9217 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9218 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
9219 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
9220 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)9221 uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
9222 return vmla_n_u16(a, b, c);
9223 }
9224
9225 // CHECK-LABEL: define <2 x i32> @test_vmla_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
9226 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9227 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9228 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
9229 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
9230 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)9231 uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
9232 return vmla_n_u32(a, b, c);
9233 }
9234
9235 // CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
9236 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
9237 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
9238 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
9239 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
9240 // CHECK: ret <2 x float> [[ADD_I]]
test_vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)9241 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
9242 return vmla_n_f32(a, b, c);
9243 }
9244
9245 // CHECK-LABEL: define <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) #0 {
9246 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
9247 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
9248 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
9249 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
9250 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
9251 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
9252 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
9253 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
9254 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
9255 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
9256 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_n_s16(int16x8_t a,int16x8_t b,int16_t c)9257 int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
9258 return vmlaq_n_s16(a, b, c);
9259 }
9260
9261 // CHECK-LABEL: define <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
9262 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
9263 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
9264 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
9265 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
9266 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
9267 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
9268 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_n_s32(int32x4_t a,int32x4_t b,int32_t c)9269 int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
9270 return vmlaq_n_s32(a, b, c);
9271 }
9272
9273 // CHECK-LABEL: define <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) #0 {
9274 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
9275 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
9276 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
9277 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
9278 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
9279 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
9280 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
9281 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
9282 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
9283 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
9284 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)9285 uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
9286 return vmlaq_n_u16(a, b, c);
9287 }
9288
9289 // CHECK-LABEL: define <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
9290 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
9291 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
9292 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
9293 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
9294 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
9295 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
9296 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)9297 uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
9298 return vmlaq_n_u32(a, b, c);
9299 }
9300
9301 // CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
9302 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
9303 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
9304 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
9305 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
9306 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
9307 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
9308 // CHECK: ret <4 x float> [[ADD_I]]
test_vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)9309 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
9310 return vmlaq_n_f32(a, b, c);
9311 }
9312
9313
9314 // CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
9315 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
9316 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
9317 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t a,int8x8_t b,int8x8_t c)9318 int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
9319 return vmls_s8(a, b, c);
9320 }
9321
9322 // CHECK-LABEL: define <4 x i16> @test_vmls_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9323 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
9324 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
9325 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_s16(int16x4_t a,int16x4_t b,int16x4_t c)9326 int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
9327 return vmls_s16(a, b, c);
9328 }
9329
9330 // CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9331 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
9332 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
9333 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t a,int32x2_t b,int32x2_t c)9334 int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
9335 return vmls_s32(a, b, c);
9336 }
9337
9338 // CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
9339 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
9340 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
9341 // CHECK: ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t a,float32x2_t b,float32x2_t c)9342 float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
9343 return vmls_f32(a, b, c);
9344 }
9345
9346 // CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
9347 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
9348 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
9349 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)9350 uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
9351 return vmls_u8(a, b, c);
9352 }
9353
9354 // CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9355 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
9356 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
9357 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)9358 uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
9359 return vmls_u16(a, b, c);
9360 }
9361
9362 // CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9363 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
9364 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
9365 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)9366 uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
9367 return vmls_u32(a, b, c);
9368 }
9369
9370 // CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
9371 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
9372 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
9373 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t a,int8x16_t b,int8x16_t c)9374 int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
9375 return vmlsq_s8(a, b, c);
9376 }
9377
9378 // CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9379 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
9380 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
9381 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t a,int16x8_t b,int16x8_t c)9382 int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
9383 return vmlsq_s16(a, b, c);
9384 }
9385
9386 // CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9387 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
9388 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
9389 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t a,int32x4_t b,int32x4_t c)9390 int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
9391 return vmlsq_s32(a, b, c);
9392 }
9393
9394 // CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
9395 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
9396 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
9397 // CHECK: ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)9398 float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
9399 return vmlsq_f32(a, b, c);
9400 }
9401
9402 // CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
9403 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
9404 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
9405 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)9406 uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
9407 return vmlsq_u8(a, b, c);
9408 }
9409
9410 // CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9411 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
9412 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
9413 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)9414 uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
9415 return vmlsq_u16(a, b, c);
9416 }
9417
9418 // CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9419 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
9420 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
9421 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)9422 uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
9423 return vmlsq_u32(a, b, c);
9424 }
9425
9426
9427 // CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
9428 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4
9429 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
9430 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)9431 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
9432 return vmlsl_s8(a, b, c);
9433 }
9434
9435 // CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9436 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9437 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9438 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9439 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9440 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
9441 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
9442 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)9443 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9444 return vmlsl_s16(a, b, c);
9445 }
9446
9447 // CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9448 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9449 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9450 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9451 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9452 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
9453 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
9454 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)9455 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9456 return vmlsl_s32(a, b, c);
9457 }
9458
9459 // CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
9460 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4
9461 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
9462 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)9463 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
9464 return vmlsl_u8(a, b, c);
9465 }
9466
9467 // CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9470 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9471 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9472 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
9473 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
9474 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)9475 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
9476 return vmlsl_u16(a, b, c);
9477 }
9478
9479 // CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9480 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9481 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9482 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9483 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9484 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
9485 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
9486 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)9487 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
9488 return vmlsl_u32(a, b, c);
9489 }
9490
9491
9492 // CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9493 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9494 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9495 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
9496 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9497 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9498 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
9499 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
9500 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)9501 int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9502 return vmlsl_lane_s16(a, b, c, 3);
9503 }
9504
9505 // CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9506 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
9507 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9508 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
9509 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9510 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9511 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
9512 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
9513 // CHECK: ret <2 x i64> [[SUB]]
test_vmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9514 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9515 return vmlsl_lane_s32(a, b, c, 1);
9516 }
9517
9518 // CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9519 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9520 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9521 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
9522 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9523 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9524 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
9525 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
9526 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsl_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)9527 uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
9528 return vmlsl_lane_u16(a, b, c, 3);
9529 }
9530
9531 // CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9532 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
9533 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9534 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
9535 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9536 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9537 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
9538 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
9539 // CHECK: ret <2 x i64> [[SUB]]
test_vmlsl_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)9540 uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
9541 return vmlsl_lane_u32(a, b, c, 1);
9542 }
9543
9544
9545 // CHECK-LABEL: define <4 x i32> @test_vmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 {
9546 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9547 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9548 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9549 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9550 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9551 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9552 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9553 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9554 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
9555 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
9556 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)9557 int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9558 return vmlsl_n_s16(a, b, c);
9559 }
9560
9561 // CHECK-LABEL: define <2 x i64> @test_vmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
9562 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9563 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9564 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9565 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9566 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9567 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9568 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
9569 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
9570 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)9571 int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9572 return vmlsl_n_s32(a, b, c);
9573 }
9574
9575 // CHECK-LABEL: define <4 x i32> @test_vmlsl_n_u16(<4 x i32> %a, <4 x i16> %b, i16 zeroext %c) #0 {
9576 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9577 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9578 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9579 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9582 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9583 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9584 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
9585 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
9586 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)9587 uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
9588 return vmlsl_n_u16(a, b, c);
9589 }
9590
9591 // CHECK-LABEL: define <2 x i64> @test_vmlsl_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
9592 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9593 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9594 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9595 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9596 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9597 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9598 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
9599 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
9600 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)9601 uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
9602 return vmlsl_n_u32(a, b, c);
9603 }
9604
9605
9606 // CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9607 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9608 // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
9609 // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
9610 // CHECK: ret <4 x i16> [[SUB]]
test_vmls_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)9611 int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
9612 return vmls_lane_s16(a, b, c, 3);
9613 }
9614
9615 // CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9616 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
9617 // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
9618 // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
9619 // CHECK: ret <2 x i32> [[SUB]]
test_vmls_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)9620 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
9621 return vmls_lane_s32(a, b, c, 1);
9622 }
9623
9624 // CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9625 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9626 // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
9627 // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
9628 // CHECK: ret <4 x i16> [[SUB]]
test_vmls_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)9629 uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
9630 return vmls_lane_u16(a, b, c, 3);
9631 }
9632
9633 // CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9634 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
9635 // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
9636 // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
9637 // CHECK: ret <2 x i32> [[SUB]]
test_vmls_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)9638 uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
9639 return vmls_lane_u32(a, b, c, 1);
9640 }
9641
9642 // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
9643 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> <i32 1, i32 1>
9644 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
9645 // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
9646 // CHECK: ret <2 x float> [[SUB]]
test_vmls_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)9647 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
9648 return vmls_lane_f32(a, b, c, 1);
9649 }
9650
9651 // CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 {
9652 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
9653 // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
9654 // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
9655 // CHECK: ret <8 x i16> [[SUB]]
test_vmlsq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)9656 int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
9657 return vmlsq_lane_s16(a, b, c, 3);
9658 }
9659
9660 // CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 {
9661 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9662 // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
9663 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
9664 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)9665 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
9666 return vmlsq_lane_s32(a, b, c, 1);
9667 }
9668
9669 // CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 {
9670 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
9671 // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
9672 // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
9673 // CHECK: ret <8 x i16> [[SUB]]
test_vmlsq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)9674 uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
9675 return vmlsq_lane_u16(a, b, c, 3);
9676 }
9677
9678 // CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 {
9679 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9680 // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
9681 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
9682 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)9683 uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
9684 return vmlsq_lane_u32(a, b, c, 1);
9685 }
9686
9687 // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 {
9688 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9689 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
9690 // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
9691 // CHECK: ret <4 x float> [[SUB]]
test_vmlsq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)9692 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
9693 return vmlsq_lane_f32(a, b, c, 1);
9694 }
9695
9696
9697 // CHECK-LABEL: define <4 x i16> @test_vmls_n_s16(<4 x i16> %a, <4 x i16> %b, i16 signext %c) #0 {
9698 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9699 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9700 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9701 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9702 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
9703 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
9704 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_n_s16(int16x4_t a,int16x4_t b,int16_t c)9705 int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
9706 return vmls_n_s16(a, b, c);
9707 }
9708
9709 // CHECK-LABEL: define <2 x i32> @test_vmls_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
9710 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9711 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9712 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
9713 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
9714 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_n_s32(int32x2_t a,int32x2_t b,int32_t c)9715 int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
9716 return vmls_n_s32(a, b, c);
9717 }
9718
9719 // CHECK-LABEL: define <4 x i16> @test_vmls_n_u16(<4 x i16> %a, <4 x i16> %b, i16 zeroext %c) #0 {
9720 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9721 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9722 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9723 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9724 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
9725 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
9726 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)9727 uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
9728 return vmls_n_u16(a, b, c);
9729 }
9730
9731 // CHECK-LABEL: define <2 x i32> @test_vmls_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
9732 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9733 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9734 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
9735 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
9736 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)9737 uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
9738 return vmls_n_u32(a, b, c);
9739 }
9740
9741 // CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
9742 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
9743 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
9744 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
9745 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
9746 // CHECK: ret <2 x float> [[SUB_I]]
test_vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)9747 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
9748 return vmls_n_f32(a, b, c);
9749 }
9750
9751 // CHECK-LABEL: define <8 x i16> @test_vmlsq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) #0 {
9752 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
9753 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
9754 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
9755 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
9756 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
9757 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
9758 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
9759 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
9760 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
9761 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
9762 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_n_s16(int16x8_t a,int16x8_t b,int16_t c)9763 int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
9764 return vmlsq_n_s16(a, b, c);
9765 }
9766
9767 // CHECK-LABEL: define <4 x i32> @test_vmlsq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
9768 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
9769 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
9770 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
9771 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
9772 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
9773 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
9774 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_n_s32(int32x4_t a,int32x4_t b,int32_t c)9775 int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
9776 return vmlsq_n_s32(a, b, c);
9777 }
9778
9779 // CHECK-LABEL: define <8 x i16> @test_vmlsq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) #0 {
9780 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
9781 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
9782 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
9783 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
9784 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
9785 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
9786 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
9787 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
9788 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
9789 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
9790 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)9791 uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
9792 return vmlsq_n_u16(a, b, c);
9793 }
9794
9795 // CHECK-LABEL: define <4 x i32> @test_vmlsq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
9796 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
9797 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
9798 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
9799 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
9800 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
9801 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
9802 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)9803 uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
9804 return vmlsq_n_u32(a, b, c);
9805 }
9806
9807 // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
9808 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
9809 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
9810 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
9811 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
9812 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
9813 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
9814 // CHECK: ret <4 x float> [[SUB_I]]
test_vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)9815 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
9816 return vmlsq_n_f32(a, b, c);
9817 }
9818
9819
9820 // CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 {
9821 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
9822 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)9823 int16x8_t test_vmovl_s8(int8x8_t a) {
9824 return vmovl_s8(a);
9825 }
9826
9827 // CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 {
9828 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9829 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9830 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
9831 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)9832 int32x4_t test_vmovl_s16(int16x4_t a) {
9833 return vmovl_s16(a);
9834 }
9835
9836 // CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 {
9837 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9838 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9839 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
9840 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)9841 int64x2_t test_vmovl_s32(int32x2_t a) {
9842 return vmovl_s32(a);
9843 }
9844
9845 // CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 {
9846 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
9847 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)9848 uint16x8_t test_vmovl_u8(uint8x8_t a) {
9849 return vmovl_u8(a);
9850 }
9851
9852 // CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 {
9853 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9854 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9855 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
9856 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)9857 uint32x4_t test_vmovl_u16(uint16x4_t a) {
9858 return vmovl_u16(a);
9859 }
9860
9861 // CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 {
9862 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9863 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9864 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
9865 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)9866 uint64x2_t test_vmovl_u32(uint32x2_t a) {
9867 return vmovl_u32(a);
9868 }
9869
9870
9871 // CHECK-LABEL: define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
9872 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9873 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
9874 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
9875 // CHECK: ret <8 x i8> [[VMOVN_I]]
test_vmovn_s16(int16x8_t a)9876 int8x8_t test_vmovn_s16(int16x8_t a) {
9877 return vmovn_s16(a);
9878 }
9879
9880 // CHECK-LABEL: define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
9881 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9882 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9883 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
9884 // CHECK: ret <4 x i16> [[VMOVN_I]]
test_vmovn_s32(int32x4_t a)9885 int16x4_t test_vmovn_s32(int32x4_t a) {
9886 return vmovn_s32(a);
9887 }
9888
9889 // CHECK-LABEL: define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
9890 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9891 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9892 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
9893 // CHECK: ret <2 x i32> [[VMOVN_I]]
test_vmovn_s64(int64x2_t a)9894 int32x2_t test_vmovn_s64(int64x2_t a) {
9895 return vmovn_s64(a);
9896 }
9897
9898 // CHECK-LABEL: define <8 x i8> @test_vmovn_u16(<8 x i16> %a) #0 {
9899 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9900 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
9901 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
9902 // CHECK: ret <8 x i8> [[VMOVN_I]]
test_vmovn_u16(uint16x8_t a)9903 uint8x8_t test_vmovn_u16(uint16x8_t a) {
9904 return vmovn_u16(a);
9905 }
9906
9907 // CHECK-LABEL: define <4 x i16> @test_vmovn_u32(<4 x i32> %a) #0 {
9908 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9909 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9910 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
9911 // CHECK: ret <4 x i16> [[VMOVN_I]]
test_vmovn_u32(uint32x4_t a)9912 uint16x4_t test_vmovn_u32(uint32x4_t a) {
9913 return vmovn_u32(a);
9914 }
9915
9916 // CHECK-LABEL: define <2 x i32> @test_vmovn_u64(<2 x i64> %a) #0 {
9917 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9918 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9919 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
9920 // CHECK: ret <2 x i32> [[VMOVN_I]]
test_vmovn_u64(uint64x2_t a)9921 uint32x2_t test_vmovn_u64(uint64x2_t a) {
9922 return vmovn_u64(a);
9923 }
9924
9925
9926 // CHECK-LABEL: define <8 x i8> @test_vmov_n_u8(i8 zeroext %a) #0 {
9927 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
9928 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
9929 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
9930 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
9931 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
9932 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
9933 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
9934 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
9935 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_u8(uint8_t a)9936 uint8x8_t test_vmov_n_u8(uint8_t a) {
9937 return vmov_n_u8(a);
9938 }
9939
9940 // CHECK-LABEL: define <4 x i16> @test_vmov_n_u16(i16 zeroext %a) #0 {
9941 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
9942 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
9943 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
9944 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
9945 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_u16(uint16_t a)9946 uint16x4_t test_vmov_n_u16(uint16_t a) {
9947 return vmov_n_u16(a);
9948 }
9949
9950 // CHECK-LABEL: define <2 x i32> @test_vmov_n_u32(i32 %a) #0 {
9951 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
9952 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
9953 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_u32(uint32_t a)9954 uint32x2_t test_vmov_n_u32(uint32_t a) {
9955 return vmov_n_u32(a);
9956 }
9957
9958 // CHECK-LABEL: define <8 x i8> @test_vmov_n_s8(i8 signext %a) #0 {
9959 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
9960 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
9961 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
9962 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
9963 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
9964 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
9965 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
9966 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
9967 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_s8(int8_t a)9968 int8x8_t test_vmov_n_s8(int8_t a) {
9969 return vmov_n_s8(a);
9970 }
9971
9972 // CHECK-LABEL: define <4 x i16> @test_vmov_n_s16(i16 signext %a) #0 {
9973 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
9974 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
9975 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
9976 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
9977 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_s16(int16_t a)9978 int16x4_t test_vmov_n_s16(int16_t a) {
9979 return vmov_n_s16(a);
9980 }
9981
9982 // CHECK-LABEL: define <2 x i32> @test_vmov_n_s32(i32 %a) #0 {
9983 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
9984 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
9985 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_s32(int32_t a)9986 int32x2_t test_vmov_n_s32(int32_t a) {
9987 return vmov_n_s32(a);
9988 }
9989
9990 // CHECK-LABEL: define <8 x i8> @test_vmov_n_p8(i8 signext %a) #0 {
9991 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
9992 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
9993 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
9994 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
9995 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
9996 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
9997 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
9998 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
9999 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_p8(poly8_t a)10000 poly8x8_t test_vmov_n_p8(poly8_t a) {
10001 return vmov_n_p8(a);
10002 }
10003
10004 // CHECK-LABEL: define <4 x i16> @test_vmov_n_p16(i16 signext %a) #0 {
10005 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
10006 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
10007 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
10008 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
10009 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_p16(poly16_t a)10010 poly16x4_t test_vmov_n_p16(poly16_t a) {
10011 return vmov_n_p16(a);
10012 }
10013
10014 // CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a) #0 {
10015 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
10016 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
10017 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
10018 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
10019 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
10020 // CHECK: ret <4 x half> [[VECINIT3]]
test_vmov_n_f16(float16_t * a)10021 float16x4_t test_vmov_n_f16(float16_t *a) {
10022 return vmov_n_f16(*a);
10023 }
10024
10025 // CHECK-LABEL: define <2 x float> @test_vmov_n_f32(float %a) #0 {
10026 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
10027 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
10028 // CHECK: ret <2 x float> [[VECINIT1_I]]
test_vmov_n_f32(float32_t a)10029 float32x2_t test_vmov_n_f32(float32_t a) {
10030 return vmov_n_f32(a);
10031 }
10032
10033 // CHECK-LABEL: define <16 x i8> @test_vmovq_n_u8(i8 zeroext %a) #0 {
10034 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
10035 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
10036 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
10037 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
10038 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
10039 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
10040 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
10041 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
10042 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
10043 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
10044 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
10045 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
10046 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
10047 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
10048 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
10049 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
10050 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_u8(uint8_t a)10051 uint8x16_t test_vmovq_n_u8(uint8_t a) {
10052 return vmovq_n_u8(a);
10053 }
10054
10055 // CHECK-LABEL: define <8 x i16> @test_vmovq_n_u16(i16 zeroext %a) #0 {
10056 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
10057 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
10058 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
10059 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
10060 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
10061 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
10062 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
10063 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
10064 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_u16(uint16_t a)10065 uint16x8_t test_vmovq_n_u16(uint16_t a) {
10066 return vmovq_n_u16(a);
10067 }
10068
10069 // CHECK-LABEL: define <4 x i32> @test_vmovq_n_u32(i32 %a) #0 {
10070 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
10071 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
10072 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
10073 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
10074 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_u32(uint32_t a)10075 uint32x4_t test_vmovq_n_u32(uint32_t a) {
10076 return vmovq_n_u32(a);
10077 }
10078
10079 // CHECK-LABEL: define <16 x i8> @test_vmovq_n_s8(i8 signext %a) #0 {
10080 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
10081 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
10082 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
10083 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
10084 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
10085 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
10086 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
10087 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
10088 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
10089 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
10090 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
10091 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
10092 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
10093 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
10094 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
10095 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
10096 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_s8(int8_t a)10097 int8x16_t test_vmovq_n_s8(int8_t a) {
10098 return vmovq_n_s8(a);
10099 }
10100
10101 // CHECK-LABEL: define <8 x i16> @test_vmovq_n_s16(i16 signext %a) #0 {
10102 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
10103 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
10104 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
10105 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
10106 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
10107 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
10108 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
10109 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
10110 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_s16(int16_t a)10111 int16x8_t test_vmovq_n_s16(int16_t a) {
10112 return vmovq_n_s16(a);
10113 }
10114
10115 // CHECK-LABEL: define <4 x i32> @test_vmovq_n_s32(i32 %a) #0 {
10116 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
10117 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
10118 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
10119 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
10120 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_s32(int32_t a)10121 int32x4_t test_vmovq_n_s32(int32_t a) {
10122 return vmovq_n_s32(a);
10123 }
10124
10125 // CHECK-LABEL: define <16 x i8> @test_vmovq_n_p8(i8 signext %a) #0 {
10126 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
10127 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
10128 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
10129 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
10130 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
10131 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
10132 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
10133 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
10134 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
10135 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
10136 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
10137 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
10138 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
10139 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
10140 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
10141 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
10142 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_p8(poly8_t a)10143 poly8x16_t test_vmovq_n_p8(poly8_t a) {
10144 return vmovq_n_p8(a);
10145 }
10146
10147 // CHECK-LABEL: define <8 x i16> @test_vmovq_n_p16(i16 signext %a) #0 {
10148 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
10149 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
10150 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
10151 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
10152 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
10153 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
10154 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
10155 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
10156 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_p16(poly16_t a)10157 poly16x8_t test_vmovq_n_p16(poly16_t a) {
10158 return vmovq_n_p16(a);
10159 }
10160
10161 // CHECK-LABEL: define <8 x half> @test_vmovq_n_f16(half* %a) #0 {
10162 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
10163 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
10164 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
10165 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
10166 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
10167 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
10168 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
10169 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
10170 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
10171 // CHECK: ret <8 x half> [[VECINIT7]]
test_vmovq_n_f16(float16_t * a)10172 float16x8_t test_vmovq_n_f16(float16_t *a) {
10173 return vmovq_n_f16(*a);
10174 }
10175
10176 // CHECK-LABEL: define <4 x float> @test_vmovq_n_f32(float %a) #0 {
10177 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
10178 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
10179 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
10180 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
10181 // CHECK: ret <4 x float> [[VECINIT3_I]]
test_vmovq_n_f32(float32_t a)10182 float32x4_t test_vmovq_n_f32(float32_t a) {
10183 return vmovq_n_f32(a);
10184 }
10185
10186 // CHECK-LABEL: define <1 x i64> @test_vmov_n_s64(i64 %a) #0 {
10187 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
10188 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
10189 // CHECK: ret <1 x i64> [[ADD_I]]
test_vmov_n_s64(int64_t a)10190 int64x1_t test_vmov_n_s64(int64_t a) {
10191 int64x1_t tmp = vmov_n_s64(a);
10192 return vadd_s64(tmp, tmp);
10193 }
10194
10195 // CHECK-LABEL: define <1 x i64> @test_vmov_n_u64(i64 %a) #0 {
10196 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
10197 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
10198 // CHECK: ret <1 x i64> [[ADD_I]]
test_vmov_n_u64(uint64_t a)10199 uint64x1_t test_vmov_n_u64(uint64_t a) {
10200 uint64x1_t tmp = vmov_n_u64(a);
10201 return vadd_u64(tmp, tmp);
10202 }
10203
10204 // CHECK-LABEL: define <2 x i64> @test_vmovq_n_s64(i64 %a) #0 {
10205 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
10206 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
10207 // CHECK: ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_s64(int64_t a)10208 int64x2_t test_vmovq_n_s64(int64_t a) {
10209 return vmovq_n_s64(a);
10210 }
10211
10212 // CHECK-LABEL: define <2 x i64> @test_vmovq_n_u64(i64 %a) #0 {
10213 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
10214 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
10215 // CHECK: ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_u64(uint64_t a)10216 uint64x2_t test_vmovq_n_u64(uint64_t a) {
10217 return vmovq_n_u64(a);
10218 }
10219
10220
10221 // CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %a, <8 x i8> %b) #0 {
10222 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
10223 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t a,int8x8_t b)10224 int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) {
10225 return vmul_s8(a, b);
10226 }
10227
10228 // CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %a, <4 x i16> %b) #0 {
10229 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
10230 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t a,int16x4_t b)10231 int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) {
10232 return vmul_s16(a, b);
10233 }
10234
10235 // CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %a, <2 x i32> %b) #0 {
10236 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
10237 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t a,int32x2_t b)10238 int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) {
10239 return vmul_s32(a, b);
10240 }
10241
10242 // CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %a, <2 x float> %b) #0 {
10243 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, %b
10244 // CHECK: ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t a,float32x2_t b)10245 float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) {
10246 return vmul_f32(a, b);
10247 }
10248
10249 // CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %a, <8 x i8> %b) #0 {
10250 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
10251 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t a,uint8x8_t b)10252 uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) {
10253 return vmul_u8(a, b);
10254 }
10255
10256 // CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %a, <4 x i16> %b) #0 {
10257 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
10258 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t a,uint16x4_t b)10259 uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) {
10260 return vmul_u16(a, b);
10261 }
10262
10263 // CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %a, <2 x i32> %b) #0 {
10264 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
10265 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t a,uint32x2_t b)10266 uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) {
10267 return vmul_u32(a, b);
10268 }
10269
10270 // CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
10271 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
10272 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t a,int8x16_t b)10273 int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) {
10274 return vmulq_s8(a, b);
10275 }
10276
10277 // CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
10278 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
10279 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t a,int16x8_t b)10280 int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) {
10281 return vmulq_s16(a, b);
10282 }
10283
10284 // CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
10285 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
10286 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t a,int32x4_t b)10287 int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) {
10288 return vmulq_s32(a, b);
10289 }
10290
10291 // CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) #0 {
10292 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, %b
10293 // CHECK: ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t a,float32x4_t b)10294 float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) {
10295 return vmulq_f32(a, b);
10296 }
10297
10298 // CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
10299 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
10300 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t a,uint8x16_t b)10301 uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) {
10302 return vmulq_u8(a, b);
10303 }
10304
10305 // CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
10306 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
10307 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t a,uint16x8_t b)10308 uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) {
10309 return vmulq_u16(a, b);
10310 }
10311
10312 // CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
10313 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
10314 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t a,uint32x4_t b)10315 uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) {
10316 return vmulq_u32(a, b);
10317 }
10318
10319
10320 // CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 {
10321 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b) #4
10322 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)10323 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
10324 return vmull_s8(a, b);
10325 }
10326
10327 // CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
10328 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10329 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10330 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10331 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10332 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
10333 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)10334 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
10335 return vmull_s16(a, b);
10336 }
10337
10338 // CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
10339 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10340 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10341 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10342 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10343 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
10344 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)10345 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
10346 return vmull_s32(a, b);
10347 }
10348
10349 // CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 {
10350 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b) #4
10351 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)10352 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
10353 return vmull_u8(a, b);
10354 }
10355
10356 // CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 {
10357 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10358 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10359 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10360 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10361 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
10362 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)10363 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
10364 return vmull_u16(a, b);
10365 }
10366
10367 // CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 {
10368 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10369 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10370 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10371 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10372 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
10373 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)10374 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
10375 return vmull_u32(a, b);
10376 }
10377
10378 // CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 {
10379 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b) #4
10380 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)10381 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
10382 return vmull_p8(a, b);
10383 }
10384
10385
10386 // CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
10387 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10388 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10389 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
10390 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10391 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10392 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
10393 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_s16(int16x4_t a,int16x4_t b)10394 int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) {
10395 return vmull_lane_s16(a, b, 3);
10396 }
10397
10398 // CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
10399 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
10400 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10401 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
10402 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10403 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10404 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
10405 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_s32(int32x2_t a,int32x2_t b)10406 int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) {
10407 return vmull_lane_s32(a, b, 1);
10408 }
10409
10410 // CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 {
10411 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10412 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10413 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
10414 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10415 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10416 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
10417 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_u16(uint16x4_t a,uint16x4_t b)10418 uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) {
10419 return vmull_lane_u16(a, b, 3);
10420 }
10421
10422 // CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 {
10423 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
10424 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10425 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
10426 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10427 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10428 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
10429 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_u32(uint32x2_t a,uint32x2_t b)10430 uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) {
10431 return vmull_lane_u32(a, b, 1);
10432 }
10433
10434
10435 // CHECK-LABEL: define <4 x i32> @test_vmull_n_s16(<4 x i16> %a, i16 signext %b) #0 {
10436 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10437 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10438 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10439 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10440 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10441 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10442 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10443 // CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10444 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4
10445 // CHECK: ret <4 x i32> [[VMULL5_I]]
test_vmull_n_s16(int16x4_t a,int16_t b)10446 int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
10447 return vmull_n_s16(a, b);
10448 }
10449
10450 // CHECK-LABEL: define <2 x i64> @test_vmull_n_s32(<2 x i32> %a, i32 %b) #0 {
10451 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10452 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10453 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10454 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10455 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10456 // CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10457 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4
10458 // CHECK: ret <2 x i64> [[VMULL3_I]]
test_vmull_n_s32(int32x2_t a,int32_t b)10459 int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
10460 return vmull_n_s32(a, b);
10461 }
10462
10463 // CHECK-LABEL: define <4 x i32> @test_vmull_n_u16(<4 x i16> %a, i16 zeroext %b) #0 {
10464 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10465 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10466 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10467 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10468 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10470 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10471 // CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10472 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4
10473 // CHECK: ret <4 x i32> [[VMULL5_I]]
test_vmull_n_u16(uint16x4_t a,uint16_t b)10474 uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
10475 return vmull_n_u16(a, b);
10476 }
10477
10478 // CHECK-LABEL: define <2 x i64> @test_vmull_n_u32(<2 x i32> %a, i32 %b) #0 {
10479 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10480 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10481 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10482 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10483 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10484 // CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10485 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4
10486 // CHECK: ret <2 x i64> [[VMULL3_I]]
test_vmull_n_u32(uint32x2_t a,uint32_t b)10487 uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
10488 return vmull_n_u32(a, b);
10489 }
10490
10491
10492 // CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %a, <8 x i8> %b) #0 {
10493 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
10494 // CHECK: ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t a,poly8x8_t b)10495 poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) {
10496 return vmul_p8(a, b);
10497 }
10498
10499 // CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
10500 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
10501 // CHECK: ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t a,poly8x16_t b)10502 poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) {
10503 return vmulq_p8(a, b);
10504 }
10505
10506
10507 // CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
10508 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10509 // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
10510 // CHECK: ret <4 x i16> [[MUL]]
test_vmul_lane_s16(int16x4_t a,int16x4_t b)10511 int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) {
10512 return vmul_lane_s16(a, b, 3);
10513 }
10514
10515 // CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
10516 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
10517 // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
10518 // CHECK: ret <2 x i32> [[MUL]]
test_vmul_lane_s32(int32x2_t a,int32x2_t b)10519 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) {
10520 return vmul_lane_s32(a, b, 1);
10521 }
10522
10523 // CHECK-LABEL: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %b) #0 {
10524 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <2 x i32> <i32 1, i32 1>
10525 // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]]
10526 // CHECK: ret <2 x float> [[MUL]]
test_vmul_lane_f32(float32x2_t a,float32x2_t b)10527 float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) {
10528 return vmul_lane_f32(a, b, 1);
10529 }
10530
10531 // CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 {
10532 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10533 // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
10534 // CHECK: ret <4 x i16> [[MUL]]
test_vmul_lane_u16(uint16x4_t a,uint16x4_t b)10535 uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) {
10536 return vmul_lane_u16(a, b, 3);
10537 }
10538
10539 // CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 {
10540 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
10541 // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
10542 // CHECK: ret <2 x i32> [[MUL]]
test_vmul_lane_u32(uint32x2_t a,uint32x2_t b)10543 uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) {
10544 return vmul_lane_u32(a, b, 1);
10545 }
10546
10547 // CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 {
10548 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10549 // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
10550 // CHECK: ret <8 x i16> [[MUL]]
test_vmulq_lane_s16(int16x8_t a,int16x4_t b)10551 int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) {
10552 return vmulq_lane_s16(a, b, 3);
10553 }
10554
10555 // CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 {
10556 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10557 // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
10558 // CHECK: ret <4 x i32> [[MUL]]
test_vmulq_lane_s32(int32x4_t a,int32x2_t b)10559 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) {
10560 return vmulq_lane_s32(a, b, 1);
10561 }
10562
10563 // CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %b) #0 {
10564 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10565 // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]]
10566 // CHECK: ret <4 x float> [[MUL]]
test_vmulq_lane_f32(float32x4_t a,float32x2_t b)10567 float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) {
10568 return vmulq_lane_f32(a, b, 1);
10569 }
10570
10571 // CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %b) #0 {
10572 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10573 // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
10574 // CHECK: ret <8 x i16> [[MUL]]
test_vmulq_lane_u16(uint16x8_t a,uint16x4_t b)10575 uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) {
10576 return vmulq_lane_u16(a, b, 3);
10577 }
10578
10579 // CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %b) #0 {
10580 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10581 // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
10582 // CHECK: ret <4 x i32> [[MUL]]
test_vmulq_lane_u32(uint32x4_t a,uint32x2_t b)10583 uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) {
10584 return vmulq_lane_u32(a, b, 1);
10585 }
10586
10587
10588 // CHECK-LABEL: define <4 x i16> @test_vmul_n_s16(<4 x i16> %a, i16 signext %b) #0 {
10589 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10590 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10591 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10592 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10593 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
10594 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_n_s16(int16x4_t a,int16_t b)10595 int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
10596 return vmul_n_s16(a, b);
10597 }
10598
10599 // CHECK-LABEL: define <2 x i32> @test_vmul_n_s32(<2 x i32> %a, i32 %b) #0 {
10600 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10601 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10602 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
10603 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_n_s32(int32x2_t a,int32_t b)10604 int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
10605 return vmul_n_s32(a, b);
10606 }
10607
10608 // CHECK-LABEL: define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 {
10609 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0
10610 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1
10611 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]]
10612 // CHECK: ret <2 x float> [[MUL_I]]
test_vmul_n_f32(float32x2_t a,float32_t b)10613 float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
10614 return vmul_n_f32(a, b);
10615 }
10616
10617 // CHECK-LABEL: define <4 x i16> @test_vmul_n_u16(<4 x i16> %a, i16 zeroext %b) #0 {
10618 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10619 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10620 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10621 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10622 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
10623 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_n_u16(uint16x4_t a,uint16_t b)10624 uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
10625 return vmul_n_u16(a, b);
10626 }
10627
10628 // CHECK-LABEL: define <2 x i32> @test_vmul_n_u32(<2 x i32> %a, i32 %b) #0 {
10629 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10630 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10631 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
10632 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_n_u32(uint32x2_t a,uint32_t b)10633 uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
10634 return vmul_n_u32(a, b);
10635 }
10636
10637 // CHECK-LABEL: define <8 x i16> @test_vmulq_n_s16(<8 x i16> %a, i16 signext %b) #0 {
10638 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10639 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10640 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10641 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10642 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10643 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10644 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10645 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10646 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
10647 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_n_s16(int16x8_t a,int16_t b)10648 int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
10649 return vmulq_n_s16(a, b);
10650 }
10651
10652 // CHECK-LABEL: define <4 x i32> @test_vmulq_n_s32(<4 x i32> %a, i32 %b) #0 {
10653 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10654 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10655 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10656 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10657 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
10658 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_n_s32(int32x4_t a,int32_t b)10659 int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
10660 return vmulq_n_s32(a, b);
10661 }
10662
10663 // CHECK-LABEL: define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 {
10664 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0
10665 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1
10666 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2
10667 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3
10668 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]]
10669 // CHECK: ret <4 x float> [[MUL_I]]
test_vmulq_n_f32(float32x4_t a,float32_t b)10670 float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
10671 return vmulq_n_f32(a, b);
10672 }
10673
10674 // CHECK-LABEL: define <8 x i16> @test_vmulq_n_u16(<8 x i16> %a, i16 zeroext %b) #0 {
10675 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10676 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10677 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10678 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10679 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10680 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10681 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10682 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10683 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
10684 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_n_u16(uint16x8_t a,uint16_t b)10685 uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
10686 return vmulq_n_u16(a, b);
10687 }
10688
10689 // CHECK-LABEL: define <4 x i32> @test_vmulq_n_u32(<4 x i32> %a, i32 %b) #0 {
10690 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10691 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10692 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10693 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10694 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
10695 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_n_u32(uint32x4_t a,uint32_t b)10696 uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
10697 return vmulq_n_u32(a, b);
10698 }
10699
10700
10701 // CHECK-LABEL: define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
10702 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10703 // CHECK: ret <8 x i8> [[NEG_I]]
test_vmvn_s8(int8x8_t a)10704 int8x8_t test_vmvn_s8(int8x8_t a) {
10705 return vmvn_s8(a);
10706 }
10707
10708 // CHECK-LABEL: define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
10709 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
10710 // CHECK: ret <4 x i16> [[NEG_I]]
test_vmvn_s16(int16x4_t a)10711 int16x4_t test_vmvn_s16(int16x4_t a) {
10712 return vmvn_s16(a);
10713 }
10714
10715 // CHECK-LABEL: define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
10716 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
10717 // CHECK: ret <2 x i32> [[NEG_I]]
test_vmvn_s32(int32x2_t a)10718 int32x2_t test_vmvn_s32(int32x2_t a) {
10719 return vmvn_s32(a);
10720 }
10721
10722 // CHECK-LABEL: define <8 x i8> @test_vmvn_u8(<8 x i8> %a) #0 {
10723 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10724 // CHECK: ret <8 x i8> [[NEG_I]]
test_vmvn_u8(uint8x8_t a)10725 uint8x8_t test_vmvn_u8(uint8x8_t a) {
10726 return vmvn_u8(a);
10727 }
10728
10729 // CHECK-LABEL: define <4 x i16> @test_vmvn_u16(<4 x i16> %a) #0 {
10730 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
10731 // CHECK: ret <4 x i16> [[NEG_I]]
test_vmvn_u16(uint16x4_t a)10732 uint16x4_t test_vmvn_u16(uint16x4_t a) {
10733 return vmvn_u16(a);
10734 }
10735
10736 // CHECK-LABEL: define <2 x i32> @test_vmvn_u32(<2 x i32> %a) #0 {
10737 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
10738 // CHECK: ret <2 x i32> [[NEG_I]]
test_vmvn_u32(uint32x2_t a)10739 uint32x2_t test_vmvn_u32(uint32x2_t a) {
10740 return vmvn_u32(a);
10741 }
10742
10743 // CHECK-LABEL: define <8 x i8> @test_vmvn_p8(<8 x i8> %a) #0 {
10744 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10745 // CHECK: ret <8 x i8> [[NEG_I]]
test_vmvn_p8(poly8x8_t a)10746 poly8x8_t test_vmvn_p8(poly8x8_t a) {
10747 return vmvn_p8(a);
10748 }
10749
10750 // CHECK-LABEL: define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
10751 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10752 // CHECK: ret <16 x i8> [[NEG_I]]
test_vmvnq_s8(int8x16_t a)10753 int8x16_t test_vmvnq_s8(int8x16_t a) {
10754 return vmvnq_s8(a);
10755 }
10756
10757 // CHECK-LABEL: define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
10758 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
10759 // CHECK: ret <8 x i16> [[NEG_I]]
test_vmvnq_s16(int16x8_t a)10760 int16x8_t test_vmvnq_s16(int16x8_t a) {
10761 return vmvnq_s16(a);
10762 }
10763
10764 // CHECK-LABEL: define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
10765 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
10766 // CHECK: ret <4 x i32> [[NEG_I]]
test_vmvnq_s32(int32x4_t a)10767 int32x4_t test_vmvnq_s32(int32x4_t a) {
10768 return vmvnq_s32(a);
10769 }
10770
10771 // CHECK-LABEL: define <16 x i8> @test_vmvnq_u8(<16 x i8> %a) #0 {
10772 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10773 // CHECK: ret <16 x i8> [[NEG_I]]
test_vmvnq_u8(uint8x16_t a)10774 uint8x16_t test_vmvnq_u8(uint8x16_t a) {
10775 return vmvnq_u8(a);
10776 }
10777
10778 // CHECK-LABEL: define <8 x i16> @test_vmvnq_u16(<8 x i16> %a) #0 {
10779 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
10780 // CHECK: ret <8 x i16> [[NEG_I]]
test_vmvnq_u16(uint16x8_t a)10781 uint16x8_t test_vmvnq_u16(uint16x8_t a) {
10782 return vmvnq_u16(a);
10783 }
10784
10785 // CHECK-LABEL: define <4 x i32> @test_vmvnq_u32(<4 x i32> %a) #0 {
10786 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
10787 // CHECK: ret <4 x i32> [[NEG_I]]
test_vmvnq_u32(uint32x4_t a)10788 uint32x4_t test_vmvnq_u32(uint32x4_t a) {
10789 return vmvnq_u32(a);
10790 }
10791
10792 // CHECK-LABEL: define <16 x i8> @test_vmvnq_p8(<16 x i8> %a) #0 {
10793 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10794 // CHECK: ret <16 x i8> [[NEG_I]]
test_vmvnq_p8(poly8x16_t a)10795 poly8x16_t test_vmvnq_p8(poly8x16_t a) {
10796 return vmvnq_p8(a);
10797 }
10798
10799
10800 // CHECK-LABEL: define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
10801 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
10802 // CHECK: ret <8 x i8> [[SUB_I]]
test_vneg_s8(int8x8_t a)10803 int8x8_t test_vneg_s8(int8x8_t a) {
10804 return vneg_s8(a);
10805 }
10806
10807 // CHECK-LABEL: define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
10808 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
10809 // CHECK: ret <4 x i16> [[SUB_I]]
test_vneg_s16(int16x4_t a)10810 int16x4_t test_vneg_s16(int16x4_t a) {
10811 return vneg_s16(a);
10812 }
10813
10814 // CHECK-LABEL: define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
10815 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
10816 // CHECK: ret <2 x i32> [[SUB_I]]
test_vneg_s32(int32x2_t a)10817 int32x2_t test_vneg_s32(int32x2_t a) {
10818 return vneg_s32(a);
10819 }
10820
10821 // CHECK-LABEL: define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
10822 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
10823 // CHECK: ret <2 x float> [[SUB_I]]
test_vneg_f32(float32x2_t a)10824 float32x2_t test_vneg_f32(float32x2_t a) {
10825 return vneg_f32(a);
10826 }
10827
10828 // CHECK-LABEL: define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
10829 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
10830 // CHECK: ret <16 x i8> [[SUB_I]]
test_vnegq_s8(int8x16_t a)10831 int8x16_t test_vnegq_s8(int8x16_t a) {
10832 return vnegq_s8(a);
10833 }
10834
10835 // CHECK-LABEL: define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
10836 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
10837 // CHECK: ret <8 x i16> [[SUB_I]]
test_vnegq_s16(int16x8_t a)10838 int16x8_t test_vnegq_s16(int16x8_t a) {
10839 return vnegq_s16(a);
10840 }
10841
10842 // CHECK-LABEL: define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
10843 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
10844 // CHECK: ret <4 x i32> [[SUB_I]]
test_vnegq_s32(int32x4_t a)10845 int32x4_t test_vnegq_s32(int32x4_t a) {
10846 return vnegq_s32(a);
10847 }
10848
10849 // CHECK-LABEL: define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
10850 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
10851 // CHECK: ret <4 x float> [[SUB_I]]
test_vnegq_f32(float32x4_t a)10852 float32x4_t test_vnegq_f32(float32x4_t a) {
10853 return vnegq_f32(a);
10854 }
10855
10856
10857 // CHECK-LABEL: define <8 x i8> @test_vorn_s8(<8 x i8> %a, <8 x i8> %b) #0 {
10858 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10859 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
10860 // CHECK: ret <8 x i8> [[OR_I]]
test_vorn_s8(int8x8_t a,int8x8_t b)10861 int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
10862 return vorn_s8(a, b);
10863 }
10864
10865 // CHECK-LABEL: define <4 x i16> @test_vorn_s16(<4 x i16> %a, <4 x i16> %b) #0 {
10866 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
10867 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
10868 // CHECK: ret <4 x i16> [[OR_I]]
test_vorn_s16(int16x4_t a,int16x4_t b)10869 int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
10870 return vorn_s16(a, b);
10871 }
10872
10873 // CHECK-LABEL: define <2 x i32> @test_vorn_s32(<2 x i32> %a, <2 x i32> %b) #0 {
10874 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
10875 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
10876 // CHECK: ret <2 x i32> [[OR_I]]
test_vorn_s32(int32x2_t a,int32x2_t b)10877 int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
10878 return vorn_s32(a, b);
10879 }
10880
10881 // CHECK-LABEL: define <1 x i64> @test_vorn_s64(<1 x i64> %a, <1 x i64> %b) #0 {
10882 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
10883 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
10884 // CHECK: ret <1 x i64> [[OR_I]]
test_vorn_s64(int64x1_t a,int64x1_t b)10885 int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
10886 return vorn_s64(a, b);
10887 }
10888
10889 // CHECK-LABEL: define <8 x i8> @test_vorn_u8(<8 x i8> %a, <8 x i8> %b) #0 {
10890 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10891 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
10892 // CHECK: ret <8 x i8> [[OR_I]]
test_vorn_u8(uint8x8_t a,uint8x8_t b)10893 uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
10894 return vorn_u8(a, b);
10895 }
10896
10897 // CHECK-LABEL: define <4 x i16> @test_vorn_u16(<4 x i16> %a, <4 x i16> %b) #0 {
10898 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
10899 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
10900 // CHECK: ret <4 x i16> [[OR_I]]
test_vorn_u16(uint16x4_t a,uint16x4_t b)10901 uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
10902 return vorn_u16(a, b);
10903 }
10904
10905 // CHECK-LABEL: define <2 x i32> @test_vorn_u32(<2 x i32> %a, <2 x i32> %b) #0 {
10906 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
10907 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
10908 // CHECK: ret <2 x i32> [[OR_I]]
test_vorn_u32(uint32x2_t a,uint32x2_t b)10909 uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
10910 return vorn_u32(a, b);
10911 }
10912
10913 // CHECK-LABEL: define <1 x i64> @test_vorn_u64(<1 x i64> %a, <1 x i64> %b) #0 {
10914 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
10915 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
10916 // CHECK: ret <1 x i64> [[OR_I]]
test_vorn_u64(uint64x1_t a,uint64x1_t b)10917 uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
10918 return vorn_u64(a, b);
10919 }
10920
10921 // CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
10922 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10923 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
10924 // CHECK: ret <16 x i8> [[OR_I]]
test_vornq_s8(int8x16_t a,int8x16_t b)10925 int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
10926 return vornq_s8(a, b);
10927 }
10928
10929 // CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
10930 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
10931 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
10932 // CHECK: ret <8 x i16> [[OR_I]]
test_vornq_s16(int16x8_t a,int16x8_t b)10933 int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
10934 return vornq_s16(a, b);
10935 }
10936
10937 // CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
10938 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
10939 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
10940 // CHECK: ret <4 x i32> [[OR_I]]
test_vornq_s32(int32x4_t a,int32x4_t b)10941 int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
10942 return vornq_s32(a, b);
10943 }
10944
10945 // CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
10946 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
10947 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
10948 // CHECK: ret <2 x i64> [[OR_I]]
test_vornq_s64(int64x2_t a,int64x2_t b)10949 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
10950 return vornq_s64(a, b);
10951 }
10952
10953 // CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
10954 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
10955 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
10956 // CHECK: ret <16 x i8> [[OR_I]]
test_vornq_u8(uint8x16_t a,uint8x16_t b)10957 uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
10958 return vornq_u8(a, b);
10959 }
10960
10961 // CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
10962 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
10963 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
10964 // CHECK: ret <8 x i16> [[OR_I]]
test_vornq_u16(uint16x8_t a,uint16x8_t b)10965 uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
10966 return vornq_u16(a, b);
10967 }
10968
10969 // CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
10970 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
10971 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
10972 // CHECK: ret <4 x i32> [[OR_I]]
test_vornq_u32(uint32x4_t a,uint32x4_t b)10973 uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
10974 return vornq_u32(a, b);
10975 }
10976
10977 // CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
10978 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
10979 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
10980 // CHECK: ret <2 x i64> [[OR_I]]
test_vornq_u64(uint64x2_t a,uint64x2_t b)10981 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
10982 return vornq_u64(a, b);
10983 }
10984
10985
10986 // CHECK-LABEL: define <8 x i8> @test_vorr_s8(<8 x i8> %a, <8 x i8> %b) #0 {
10987 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
10988 // CHECK: ret <8 x i8> [[OR_I]]
test_vorr_s8(int8x8_t a,int8x8_t b)10989 int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
10990 return vorr_s8(a, b);
10991 }
10992
10993 // CHECK-LABEL: define <4 x i16> @test_vorr_s16(<4 x i16> %a, <4 x i16> %b) #0 {
10994 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
10995 // CHECK: ret <4 x i16> [[OR_I]]
test_vorr_s16(int16x4_t a,int16x4_t b)10996 int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
10997 return vorr_s16(a, b);
10998 }
10999
11000 // CHECK-LABEL: define <2 x i32> @test_vorr_s32(<2 x i32> %a, <2 x i32> %b) #0 {
11001 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
11002 // CHECK: ret <2 x i32> [[OR_I]]
test_vorr_s32(int32x2_t a,int32x2_t b)11003 int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
11004 return vorr_s32(a, b);
11005 }
11006
11007 // CHECK-LABEL: define <1 x i64> @test_vorr_s64(<1 x i64> %a, <1 x i64> %b) #0 {
11008 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
11009 // CHECK: ret <1 x i64> [[OR_I]]
test_vorr_s64(int64x1_t a,int64x1_t b)11010 int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
11011 return vorr_s64(a, b);
11012 }
11013
11014 // CHECK-LABEL: define <8 x i8> @test_vorr_u8(<8 x i8> %a, <8 x i8> %b) #0 {
11015 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
11016 // CHECK: ret <8 x i8> [[OR_I]]
test_vorr_u8(uint8x8_t a,uint8x8_t b)11017 uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
11018 return vorr_u8(a, b);
11019 }
11020
11021 // CHECK-LABEL: define <4 x i16> @test_vorr_u16(<4 x i16> %a, <4 x i16> %b) #0 {
11022 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
11023 // CHECK: ret <4 x i16> [[OR_I]]
test_vorr_u16(uint16x4_t a,uint16x4_t b)11024 uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
11025 return vorr_u16(a, b);
11026 }
11027
11028 // CHECK-LABEL: define <2 x i32> @test_vorr_u32(<2 x i32> %a, <2 x i32> %b) #0 {
11029 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
11030 // CHECK: ret <2 x i32> [[OR_I]]
test_vorr_u32(uint32x2_t a,uint32x2_t b)11031 uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
11032 return vorr_u32(a, b);
11033 }
11034
11035 // CHECK-LABEL: define <1 x i64> @test_vorr_u64(<1 x i64> %a, <1 x i64> %b) #0 {
11036 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
11037 // CHECK: ret <1 x i64> [[OR_I]]
test_vorr_u64(uint64x1_t a,uint64x1_t b)11038 uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
11039 return vorr_u64(a, b);
11040 }
11041
11042 // CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
11043 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
11044 // CHECK: ret <16 x i8> [[OR_I]]
test_vorrq_s8(int8x16_t a,int8x16_t b)11045 int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
11046 return vorrq_s8(a, b);
11047 }
11048
11049 // CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
11050 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
11051 // CHECK: ret <8 x i16> [[OR_I]]
test_vorrq_s16(int16x8_t a,int16x8_t b)11052 int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
11053 return vorrq_s16(a, b);
11054 }
11055
11056 // CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
11057 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
11058 // CHECK: ret <4 x i32> [[OR_I]]
test_vorrq_s32(int32x4_t a,int32x4_t b)11059 int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
11060 return vorrq_s32(a, b);
11061 }
11062
11063 // CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
11064 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
11065 // CHECK: ret <2 x i64> [[OR_I]]
test_vorrq_s64(int64x2_t a,int64x2_t b)11066 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
11067 return vorrq_s64(a, b);
11068 }
11069
11070 // CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
11071 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
11072 // CHECK: ret <16 x i8> [[OR_I]]
test_vorrq_u8(uint8x16_t a,uint8x16_t b)11073 uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
11074 return vorrq_u8(a, b);
11075 }
11076
11077 // CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
11078 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
11079 // CHECK: ret <8 x i16> [[OR_I]]
test_vorrq_u16(uint16x8_t a,uint16x8_t b)11080 uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
11081 return vorrq_u16(a, b);
11082 }
11083
11084 // CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
11085 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
11086 // CHECK: ret <4 x i32> [[OR_I]]
test_vorrq_u32(uint32x4_t a,uint32x4_t b)11087 uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
11088 return vorrq_u32(a, b);
11089 }
11090
11091 // CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
11092 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
11093 // CHECK: ret <2 x i64> [[OR_I]]
test_vorrq_u64(uint64x2_t a,uint64x2_t b)11094 uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
11095 return vorrq_u64(a, b);
11096 }
11097
11098
11099 // CHECK-LABEL: define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
11100 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11101 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11102 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> %b) #4
11103 // CHECK: ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_s8(int16x4_t a,int8x8_t b)11104 int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
11105 return vpadal_s8(a, b);
11106 }
11107
11108 // CHECK-LABEL: define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
11109 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11110 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11111 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11112 // CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11113 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]]) #4
11114 // CHECK: ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_s16(int32x2_t a,int16x4_t b)11115 int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
11116 return vpadal_s16(a, b);
11117 }
11118
11119 // CHECK-LABEL: define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
11120 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11121 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11122 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
11123 // CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11124 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]]) #4
11125 // CHECK: ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_s32(int64x1_t a,int32x2_t b)11126 int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
11127 return vpadal_s32(a, b);
11128 }
11129
11130 // CHECK-LABEL: define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
11131 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11132 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11133 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> %b) #4
11134 // CHECK: ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_u8(uint16x4_t a,uint8x8_t b)11135 uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
11136 return vpadal_u8(a, b);
11137 }
11138
11139 // CHECK-LABEL: define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
11140 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11141 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11142 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11143 // CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11144 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]]) #4
11145 // CHECK: ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_u16(uint32x2_t a,uint16x4_t b)11146 uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
11147 return vpadal_u16(a, b);
11148 }
11149
11150 // CHECK-LABEL: define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
11151 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11152 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11153 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
11154 // CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11155 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]]) #4
11156 // CHECK: ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_u32(uint64x1_t a,uint32x2_t b)11157 uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
11158 return vpadal_u32(a, b);
11159 }
11160
11161 // CHECK-LABEL: define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
11162 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11163 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11164 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> %b) #4
11165 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_s8(int16x8_t a,int8x16_t b)11166 int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
11167 return vpadalq_s8(a, b);
11168 }
11169
11170 // CHECK-LABEL: define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
11171 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11172 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11173 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11174 // CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11175 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]]) #4
11176 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_s16(int32x4_t a,int16x8_t b)11177 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
11178 return vpadalq_s16(a, b);
11179 }
11180
11181 // CHECK-LABEL: define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
11182 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11183 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11184 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11185 // CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11186 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]]) #4
11187 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_s32(int64x2_t a,int32x4_t b)11188 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
11189 return vpadalq_s32(a, b);
11190 }
11191
11192 // CHECK-LABEL: define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
11193 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11194 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11195 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> %b) #4
11196 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_u8(uint16x8_t a,uint8x16_t b)11197 uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
11198 return vpadalq_u8(a, b);
11199 }
11200
11201 // CHECK-LABEL: define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
11202 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11203 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11204 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11205 // CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11206 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]]) #4
11207 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_u16(uint32x4_t a,uint16x8_t b)11208 uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
11209 return vpadalq_u16(a, b);
11210 }
11211
11212 // CHECK-LABEL: define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
11213 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11214 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11215 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11216 // CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11217 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]]) #4
11218 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_u32(uint64x2_t a,uint32x4_t b)11219 uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
11220 return vpadalq_u32(a, b);
11221 }
11222
11223
11224 // CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
11225 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
11226 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)11227 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
11228 return vpadd_s8(a, b);
11229 }
11230
11231 // CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
11232 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11233 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11234 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11235 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11236 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
11237 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
11238 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
11239 // CHECK: ret <4 x i16> [[TMP2]]
test_vpadd_s16(int16x4_t a,int16x4_t b)11240 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
11241 return vpadd_s16(a, b);
11242 }
11243
11244 // CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
11245 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11246 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11247 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11248 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11249 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
11250 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
11251 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
11252 // CHECK: ret <2 x i32> [[TMP2]]
test_vpadd_s32(int32x2_t a,int32x2_t b)11253 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
11254 return vpadd_s32(a, b);
11255 }
11256
11257 // CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
11258 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
11259 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)11260 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
11261 return vpadd_u8(a, b);
11262 }
11263
11264 // CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
11265 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11266 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11267 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11268 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11269 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
11270 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
11271 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
11272 // CHECK: ret <4 x i16> [[TMP2]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)11273 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
11274 return vpadd_u16(a, b);
11275 }
11276
11277 // CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
11278 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11279 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11280 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11281 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11282 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
11283 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
11284 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
11285 // CHECK: ret <2 x i32> [[TMP2]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)11286 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
11287 return vpadd_u32(a, b);
11288 }
11289
11290 // CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 {
11291 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11292 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11293 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
11294 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
11295 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4
11296 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
11297 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float>
11298 // CHECK: ret <2 x float> [[TMP2]]
test_vpadd_f32(float32x2_t a,float32x2_t b)11299 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
11300 return vpadd_f32(a, b);
11301 }
11302
11303
11304 // CHECK-LABEL: define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
11305 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4
11306 // CHECK: ret <4 x i16> [[VPADDL_I]]
test_vpaddl_s8(int8x8_t a)11307 int16x4_t test_vpaddl_s8(int8x8_t a) {
11308 return vpaddl_s8(a);
11309 }
11310
11311 // CHECK-LABEL: define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
11312 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11313 // CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11314 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4
11315 // CHECK: ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_s16(int16x4_t a)11316 int32x2_t test_vpaddl_s16(int16x4_t a) {
11317 return vpaddl_s16(a);
11318 }
11319
11320 // CHECK-LABEL: define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
11321 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11322 // CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11323 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4
11324 // CHECK: ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_s32(int32x2_t a)11325 int64x1_t test_vpaddl_s32(int32x2_t a) {
11326 return vpaddl_s32(a);
11327 }
11328
11329 // CHECK-LABEL: define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
11330 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4
11331 // CHECK: ret <4 x i16> [[VPADDL_I]]
test_vpaddl_u8(uint8x8_t a)11332 uint16x4_t test_vpaddl_u8(uint8x8_t a) {
11333 return vpaddl_u8(a);
11334 }
11335
11336 // CHECK-LABEL: define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
11337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11338 // CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11339 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4
11340 // CHECK: ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_u16(uint16x4_t a)11341 uint32x2_t test_vpaddl_u16(uint16x4_t a) {
11342 return vpaddl_u16(a);
11343 }
11344
11345 // CHECK-LABEL: define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
11346 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11347 // CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11348 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4
11349 // CHECK: ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_u32(uint32x2_t a)11350 uint64x1_t test_vpaddl_u32(uint32x2_t a) {
11351 return vpaddl_u32(a);
11352 }
11353
11354 // CHECK-LABEL: define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
11355 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4
11356 // CHECK: ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_s8(int8x16_t a)11357 int16x8_t test_vpaddlq_s8(int8x16_t a) {
11358 return vpaddlq_s8(a);
11359 }
11360
11361 // CHECK-LABEL: define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
11362 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11363 // CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11364 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4
11365 // CHECK: ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_s16(int16x8_t a)11366 int32x4_t test_vpaddlq_s16(int16x8_t a) {
11367 return vpaddlq_s16(a);
11368 }
11369
11370 // CHECK-LABEL: define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
11371 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11372 // CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11373 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4
11374 // CHECK: ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_s32(int32x4_t a)11375 int64x2_t test_vpaddlq_s32(int32x4_t a) {
11376 return vpaddlq_s32(a);
11377 }
11378
11379 // CHECK-LABEL: define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
11380 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4
11381 // CHECK: ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_u8(uint8x16_t a)11382 uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
11383 return vpaddlq_u8(a);
11384 }
11385
11386 // CHECK-LABEL: define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
11387 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11388 // CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11389 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4
11390 // CHECK: ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_u16(uint16x8_t a)11391 uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
11392 return vpaddlq_u16(a);
11393 }
11394
11395 // CHECK-LABEL: define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
11396 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11397 // CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11398 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4
11399 // CHECK: ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_u32(uint32x4_t a)11400 uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
11401 return vpaddlq_u32(a);
11402 }
11403
11404
11405 // CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
11406 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4
11407 // CHECK: ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)11408 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
11409 return vpmax_s8(a, b);
11410 }
11411
11412 // CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
11413 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11414 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11415 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11416 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11417 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]]) #4
11418 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
11419 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16>
11420 // CHECK: ret <4 x i16> [[TMP2]]
test_vpmax_s16(int16x4_t a,int16x4_t b)11421 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
11422 return vpmax_s16(a, b);
11423 }
11424
11425 // CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
11426 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11427 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11428 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11429 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11430 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]]) #4
11431 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
11432 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32>
11433 // CHECK: ret <2 x i32> [[TMP2]]
test_vpmax_s32(int32x2_t a,int32x2_t b)11434 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
11435 return vpmax_s32(a, b);
11436 }
11437
11438 // CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
11439 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
11440 // CHECK: ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)11441 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
11442 return vpmax_u8(a, b);
11443 }
11444
11445 // CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
11446 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11447 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11448 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11449 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11450 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]]) #4
11451 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
11452 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16>
11453 // CHECK: ret <4 x i16> [[TMP2]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)11454 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
11455 return vpmax_u16(a, b);
11456 }
11457
11458 // CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
11459 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11460 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11461 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11462 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11463 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]]) #4
11464 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
11465 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32>
11466 // CHECK: ret <2 x i32> [[TMP2]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)11467 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
11468 return vpmax_u32(a, b);
11469 }
11470
11471 // CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 {
11472 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11473 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11474 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
11475 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
11476 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> [[VPMAX_V_I]], <2 x float> [[VPMAX_V1_I]]) #4
11477 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8>
11478 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x float>
11479 // CHECK: ret <2 x float> [[TMP2]]
test_vpmax_f32(float32x2_t a,float32x2_t b)11480 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
11481 return vpmax_f32(a, b);
11482 }
11483
11484
11485 // CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
11486 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4
11487 // CHECK: ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)11488 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
11489 return vpmin_s8(a, b);
11490 }
11491
11492 // CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
11493 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11494 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11495 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11496 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11497 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]]) #4
11498 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
11499 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16>
11500 // CHECK: ret <4 x i16> [[TMP2]]
test_vpmin_s16(int16x4_t a,int16x4_t b)11501 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
11502 return vpmin_s16(a, b);
11503 }
11504
11505 // CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
11506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11507 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11508 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11509 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11510 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]]) #4
11511 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
11512 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32>
11513 // CHECK: ret <2 x i32> [[TMP2]]
test_vpmin_s32(int32x2_t a,int32x2_t b)11514 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
11515 return vpmin_s32(a, b);
11516 }
11517
11518 // CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
11519 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
11520 // CHECK: ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)11521 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
11522 return vpmin_u8(a, b);
11523 }
11524
11525 // CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
11526 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11527 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11528 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11529 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11530 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]]) #4
11531 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
11532 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16>
11533 // CHECK: ret <4 x i16> [[TMP2]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)11534 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
11535 return vpmin_u16(a, b);
11536 }
11537
11538 // CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
11539 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11540 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11541 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11542 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11543 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]]) #4
11544 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
11545 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32>
11546 // CHECK: ret <2 x i32> [[TMP2]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)11547 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
11548 return vpmin_u32(a, b);
11549 }
11550
11551 // CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 {
11552 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11553 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11554 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
11555 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
11556 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> [[VPMIN_V_I]], <2 x float> [[VPMIN_V1_I]]) #4
11557 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8>
11558 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x float>
11559 // CHECK: ret <2 x float> [[TMP2]]
test_vpmin_f32(float32x2_t a,float32x2_t b)11560 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
11561 return vpmin_f32(a, b);
11562 }
11563
11564
11565 // CHECK-LABEL: define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
11566 // CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4
11567 // CHECK: ret <8 x i8> [[VQABS_V_I]]
test_vqabs_s8(int8x8_t a)11568 int8x8_t test_vqabs_s8(int8x8_t a) {
11569 return vqabs_s8(a);
11570 }
11571
11572 // CHECK-LABEL: define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
11573 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11574 // CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11575 // CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> [[VQABS_V_I]]) #4
11576 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
11577 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16>
11578 // CHECK: ret <4 x i16> [[TMP1]]
test_vqabs_s16(int16x4_t a)11579 int16x4_t test_vqabs_s16(int16x4_t a) {
11580 return vqabs_s16(a);
11581 }
11582
11583 // CHECK-LABEL: define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
11584 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11585 // CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11586 // CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> [[VQABS_V_I]]) #4
11587 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
11588 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32>
11589 // CHECK: ret <2 x i32> [[TMP1]]
test_vqabs_s32(int32x2_t a)11590 int32x2_t test_vqabs_s32(int32x2_t a) {
11591 return vqabs_s32(a);
11592 }
11593
11594 // CHECK-LABEL: define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
11595 // CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4
11596 // CHECK: ret <16 x i8> [[VQABSQ_V_I]]
test_vqabsq_s8(int8x16_t a)11597 int8x16_t test_vqabsq_s8(int8x16_t a) {
11598 return vqabsq_s8(a);
11599 }
11600
11601 // CHECK-LABEL: define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
11602 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11603 // CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11604 // CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> [[VQABSQ_V_I]]) #4
11605 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
11606 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16>
11607 // CHECK: ret <8 x i16> [[TMP1]]
test_vqabsq_s16(int16x8_t a)11608 int16x8_t test_vqabsq_s16(int16x8_t a) {
11609 return vqabsq_s16(a);
11610 }
11611
11612 // CHECK-LABEL: define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
11613 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11614 // CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11615 // CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> [[VQABSQ_V_I]]) #4
11616 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
11617 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32>
11618 // CHECK: ret <4 x i32> [[TMP1]]
test_vqabsq_s32(int32x4_t a)11619 int32x4_t test_vqabsq_s32(int32x4_t a) {
11620 return vqabsq_s32(a);
11621 }
11622
11623
11624 // CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
11625 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4
11626 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)11627 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
11628 return vqadd_s8(a, b);
11629 }
11630
11631 // CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
11632 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11633 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11634 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11635 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11636 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
11637 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
11638 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
11639 // CHECK: ret <4 x i16> [[TMP2]]
test_vqadd_s16(int16x4_t a,int16x4_t b)11640 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
11641 return vqadd_s16(a, b);
11642 }
11643
11644 // CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
11645 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11646 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11647 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11648 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11649 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
11650 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
11651 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
11652 // CHECK: ret <2 x i32> [[TMP2]]
test_vqadd_s32(int32x2_t a,int32x2_t b)11653 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
11654 return vqadd_s32(a, b);
11655 }
11656
11657 // CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
11658 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11659 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11660 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
11661 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
11662 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
11663 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
11664 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
11665 // CHECK: ret <1 x i64> [[TMP2]]
test_vqadd_s64(int64x1_t a,int64x1_t b)11666 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
11667 return vqadd_s64(a, b);
11668 }
11669
11670 // CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
11671 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
11672 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)11673 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
11674 return vqadd_u8(a, b);
11675 }
11676
11677 // CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
11678 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11679 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11680 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
11681 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11682 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
11683 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
11684 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
11685 // CHECK: ret <4 x i16> [[TMP2]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)11686 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
11687 return vqadd_u16(a, b);
11688 }
11689
11690 // CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
11691 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11692 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11693 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11694 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11695 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
11696 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
11697 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
11698 // CHECK: ret <2 x i32> [[TMP2]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)11699 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
11700 return vqadd_u32(a, b);
11701 }
11702
11703 // CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
11704 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11705 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11706 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
11707 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
11708 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
11709 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
11710 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
11711 // CHECK: ret <1 x i64> [[TMP2]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)11712 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
11713 return vqadd_u64(a, b);
11714 }
11715
11716 // CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
11717 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4
11718 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)11719 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
11720 return vqaddq_s8(a, b);
11721 }
11722
11723 // CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
11724 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11725 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11726 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11727 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11728 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
11729 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
11730 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
11731 // CHECK: ret <8 x i16> [[TMP2]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)11732 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
11733 return vqaddq_s16(a, b);
11734 }
11735
11736 // CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
11737 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11738 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11739 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11740 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11741 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
11742 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
11743 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
11744 // CHECK: ret <4 x i32> [[TMP2]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)11745 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
11746 return vqaddq_s32(a, b);
11747 }
11748
11749 // CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
11750 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11751 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11752 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11753 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
11754 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
11755 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
11756 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
11757 // CHECK: ret <2 x i64> [[TMP2]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)11758 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
11759 return vqaddq_s64(a, b);
11760 }
11761
11762 // CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
11763 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
11764 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)11765 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
11766 return vqaddq_u8(a, b);
11767 }
11768
11769 // CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
11770 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11771 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11772 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11773 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11774 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
11775 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
11776 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
11777 // CHECK: ret <8 x i16> [[TMP2]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)11778 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
11779 return vqaddq_u16(a, b);
11780 }
11781
11782 // CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
11783 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11784 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11785 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11786 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11787 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
11788 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
11789 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
11790 // CHECK: ret <4 x i32> [[TMP2]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)11791 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
11792 return vqaddq_u32(a, b);
11793 }
11794
11795 // CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
11796 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11797 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11798 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11799 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
11800 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
11801 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
11802 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
11803 // CHECK: ret <2 x i64> [[TMP2]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)11804 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
11805 return vqaddq_u64(a, b);
11806 }
11807
11808
11809 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
11810 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11811 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11812 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
11813 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11814 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
11815 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
11816 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11817 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
11818 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)11819 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
11820 return vqdmlal_s16(a, b, c);
11821 }
11822
11823 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
11824 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11825 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11826 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
11827 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11828 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
11829 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
11830 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11831 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
11832 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)11833 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
11834 return vqdmlal_s32(a, b, c);
11835 }
11836
11837
11838 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
11839 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
11840 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11841 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11842 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
11843 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11844 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
11845 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
11846 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11847 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
11848 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)11849 int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
11850 return vqdmlal_lane_s16(a, b, c, 3);
11851 }
11852
11853 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
11854 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
11855 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11856 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11857 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
11858 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11859 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
11860 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
11861 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11862 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
11863 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)11864 int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
11865 return vqdmlal_lane_s32(a, b, c, 1);
11866 }
11867
11868
11869 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 {
11870 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11871 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11872 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
11873 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
11874 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
11875 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
11876 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
11877 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11878 // CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
11879 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4
11880 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11881 // CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4
11882 // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]]
test_vqdmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)11883 int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
11884 return vqdmlal_n_s16(a, b, c);
11885 }
11886
11887 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
11888 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11889 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11890 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
11891 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
11892 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
11893 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11894 // CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
11895 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4
11896 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11897 // CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4
11898 // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]]
test_vqdmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)11899 int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
11900 return vqdmlal_n_s32(a, b, c);
11901 }
11902
11903
11904 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
11905 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11906 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11907 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
11908 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11909 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
11910 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
11911 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11912 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
11913 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)11914 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
11915 return vqdmlsl_s16(a, b, c);
11916 }
11917
11918 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
11919 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11920 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11921 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
11922 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11923 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
11924 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
11925 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11926 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
11927 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)11928 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
11929 return vqdmlsl_s32(a, b, c);
11930 }
11931
11932
11933 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
11934 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
11935 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11936 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11937 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
11938 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11939 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
11940 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
11941 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11942 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
11943 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)11944 int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
11945 return vqdmlsl_lane_s16(a, b, c, 3);
11946 }
11947
11948 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
11949 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
11950 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11951 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11952 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
11953 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11954 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
11955 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
11956 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11957 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
11958 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)11959 int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
11960 return vqdmlsl_lane_s32(a, b, c, 1);
11961 }
11962
11963
11964 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 {
11965 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11966 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11967 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
11968 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
11969 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
11970 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
11971 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
11972 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11973 // CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
11974 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4
11975 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11976 // CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4
11977 // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]]
test_vqdmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)11978 int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
11979 return vqdmlsl_n_s16(a, b, c);
11980 }
11981
11982 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
11983 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11984 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11985 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
11986 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
11987 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
11988 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11989 // CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
11990 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4
11991 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11992 // CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4
11993 // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]]
test_vqdmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)11994 int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
11995 return vqdmlsl_n_s32(a, b, c);
11996 }
11997
11998
11999 // CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
12000 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12001 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
12002 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12003 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12004 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4
12005 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
12006 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
12007 // CHECK: ret <4 x i16> [[TMP2]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)12008 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
12009 return vqdmulh_s16(a, b);
12010 }
12011
12012 // CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
12013 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12014 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
12015 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12016 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12017 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4
12018 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
12019 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
12020 // CHECK: ret <2 x i32> [[TMP2]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)12021 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
12022 return vqdmulh_s32(a, b);
12023 }
12024
12025 // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
12026 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12027 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
12028 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12029 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
12030 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4
12031 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
12032 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
12033 // CHECK: ret <8 x i16> [[TMP2]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)12034 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
12035 return vqdmulhq_s16(a, b);
12036 }
12037
12038 // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
12039 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12040 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
12041 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12042 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
12043 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4
12044 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
12045 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
12046 // CHECK: ret <4 x i32> [[TMP2]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)12047 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
12048 return vqdmulhq_s32(a, b);
12049 }
12050
12051
12052 // CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
12053 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
12054 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12055 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
12056 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12057 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12058 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4
12059 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
12060 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
12061 // CHECK: ret <4 x i16> [[TMP2]]
test_vqdmulh_lane_s16(int16x4_t a,int16x4_t b)12062 int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) {
12063 return vqdmulh_lane_s16(a, b, 3);
12064 }
12065
12066 // CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
12067 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
12068 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12069 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
12070 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12071 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12072 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4
12073 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
12074 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
12075 // CHECK: ret <2 x i32> [[TMP2]]
test_vqdmulh_lane_s32(int32x2_t a,int32x2_t b)12076 int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) {
12077 return vqdmulh_lane_s32(a, b, 1);
12078 }
12079
12080 // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 {
12081 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
12082 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12083 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
12084 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12085 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
12086 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4
12087 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
12088 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
12089 // CHECK: ret <8 x i16> [[TMP2]]
test_vqdmulhq_lane_s16(int16x8_t a,int16x4_t b)12090 int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
12091 return vqdmulhq_lane_s16(a, b, 3);
12092 }
12093
12094 // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 {
12095 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
12096 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12097 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
12098 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12099 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
12100 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4
12101 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
12102 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
12103 // CHECK: ret <4 x i32> [[TMP2]]
test_vqdmulhq_lane_s32(int32x4_t a,int32x2_t b)12104 int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
12105 return vqdmulhq_lane_s32(a, b, 1);
12106 }
12107
12108
12109 // CHECK-LABEL: define <4 x i16> @test_vqdmulh_n_s16(<4 x i16> %a, i16 signext %b) #0 {
12110 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12111 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
12112 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
12113 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
12114 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
12115 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
12116 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12117 // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12118 // CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V4_I]]) #4
12119 // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8>
12120 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V6_I]] to <4 x i16>
12121 // CHECK: ret <4 x i16> [[TMP2]]
test_vqdmulh_n_s16(int16x4_t a,int16_t b)12122 int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
12123 return vqdmulh_n_s16(a, b);
12124 }
12125
12126 // CHECK-LABEL: define <2 x i32> @test_vqdmulh_n_s32(<2 x i32> %a, i32 %b) #0 {
12127 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12128 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
12129 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
12130 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
12131 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12132 // CHECK: [[VQDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12133 // CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V2_I]]) #4
12134 // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8>
12135 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V4_I]] to <2 x i32>
12136 // CHECK: ret <2 x i32> [[TMP2]]
test_vqdmulh_n_s32(int32x2_t a,int32_t b)12137 int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
12138 return vqdmulh_n_s32(a, b);
12139 }
12140
12141 // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_n_s16(<8 x i16> %a, i16 signext %b) #0 {
12142 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12143 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
12144 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
12145 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
12146 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
12147 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
12148 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
12149 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
12150 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
12151 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
12152 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12153 // CHECK: [[VQDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
12154 // CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V8_I]]) #4
12155 // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8>
12156 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V10_I]] to <8 x i16>
12157 // CHECK: ret <8 x i16> [[TMP2]]
test_vqdmulhq_n_s16(int16x8_t a,int16_t b)12158 int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
12159 return vqdmulhq_n_s16(a, b);
12160 }
12161
12162 // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 {
12163 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12164 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
12165 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
12166 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
12167 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
12168 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
12169 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12170 // CHECK: [[VQDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
12171 // CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V4_I]]) #4
12172 // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8>
12173 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V6_I]] to <4 x i32>
12174 // CHECK: ret <4 x i32> [[TMP2]]
test_vqdmulhq_n_s32(int32x4_t a,int32_t b)12175 int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
12176 return vqdmulhq_n_s32(a, b);
12177 }
12178
12179
12180 // CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
12181 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12182 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
12183 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12184 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12185 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4
12186 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
12187 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
12188 // CHECK: ret <4 x i32> [[TMP2]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)12189 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
12190 return vqdmull_s16(a, b);
12191 }
12192
12193 // CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
12194 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12195 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
12196 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12197 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12198 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4
12199 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
12200 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
12201 // CHECK: ret <2 x i64> [[TMP2]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)12202 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
12203 return vqdmull_s32(a, b);
12204 }
12205
12206
12207 // CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
12208 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
12209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12210 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
12211 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12212 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12213 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4
12214 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
12215 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
12216 // CHECK: ret <4 x i32> [[TMP2]]
test_vqdmull_lane_s16(int16x4_t a,int16x4_t b)12217 int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) {
12218 return vqdmull_lane_s16(a, b, 3);
12219 }
12220
12221 // CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
12222 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
12223 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12224 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
12225 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12226 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12227 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4
12228 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
12229 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
12230 // CHECK: ret <2 x i64> [[TMP2]]
test_vqdmull_lane_s32(int32x2_t a,int32x2_t b)12231 int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) {
12232 return vqdmull_lane_s32(a, b, 1);
12233 }
12234
12235
12236 // CHECK-LABEL: define <4 x i32> @test_vqdmull_n_s16(<4 x i16> %a, i16 signext %b) #0 {
12237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12238 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
12239 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
12240 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
12241 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
12242 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
12243 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12244 // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12245 // CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V4_I]]) #4
12246 // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8>
12247 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I]] to <4 x i32>
12248 // CHECK: ret <4 x i32> [[TMP2]]
test_vqdmull_n_s16(int16x4_t a,int16_t b)12249 int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
12250 return vqdmull_n_s16(a, b);
12251 }
12252
12253 // CHECK-LABEL: define <2 x i64> @test_vqdmull_n_s32(<2 x i32> %a, i32 %b) #0 {
12254 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12255 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
12256 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
12257 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
12258 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12259 // CHECK: [[VQDMULL_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12260 // CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V2_I]]) #4
12261 // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8>
12262 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I]] to <2 x i64>
12263 // CHECK: ret <2 x i64> [[TMP2]]
test_vqdmull_n_s32(int32x2_t a,int32_t b)12264 int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
12265 return vqdmull_n_s32(a, b);
12266 }
12267
12268
12269 // CHECK-LABEL: define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
12270 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12271 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12272 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4
12273 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_s16(int16x8_t a)12274 int8x8_t test_vqmovn_s16(int16x8_t a) {
12275 return vqmovn_s16(a);
12276 }
12277
12278 // CHECK-LABEL: define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
12279 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12280 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12281 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4
12282 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
12283 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
12284 // CHECK: ret <4 x i16> [[TMP1]]
test_vqmovn_s32(int32x4_t a)12285 int16x4_t test_vqmovn_s32(int32x4_t a) {
12286 return vqmovn_s32(a);
12287 }
12288
12289 // CHECK-LABEL: define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
12290 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12291 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
12292 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4
12293 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
12294 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
12295 // CHECK: ret <2 x i32> [[TMP1]]
test_vqmovn_s64(int64x2_t a)12296 int32x2_t test_vqmovn_s64(int64x2_t a) {
12297 return vqmovn_s64(a);
12298 }
12299
12300 // CHECK-LABEL: define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
12301 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12302 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12303 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4
12304 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_u16(uint16x8_t a)12305 uint8x8_t test_vqmovn_u16(uint16x8_t a) {
12306 return vqmovn_u16(a);
12307 }
12308
12309 // CHECK-LABEL: define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
12310 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12311 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12312 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4
12313 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
12314 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
12315 // CHECK: ret <4 x i16> [[TMP1]]
test_vqmovn_u32(uint32x4_t a)12316 uint16x4_t test_vqmovn_u32(uint32x4_t a) {
12317 return vqmovn_u32(a);
12318 }
12319
12320 // CHECK-LABEL: define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
12321 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12322 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
12323 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4
12324 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
12325 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
12326 // CHECK: ret <2 x i32> [[TMP1]]
test_vqmovn_u64(uint64x2_t a)12327 uint32x2_t test_vqmovn_u64(uint64x2_t a) {
12328 return vqmovn_u64(a);
12329 }
12330
12331
12332 // CHECK-LABEL: define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
12333 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12334 // CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12335 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> [[VQMOVUN_V_I]]) #4
12336 // CHECK: ret <8 x i8> [[VQMOVUN_V1_I]]
test_vqmovun_s16(int16x8_t a)12337 uint8x8_t test_vqmovun_s16(int16x8_t a) {
12338 return vqmovun_s16(a);
12339 }
12340
12341 // CHECK-LABEL: define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
12342 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12343 // CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12344 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> [[VQMOVUN_V_I]]) #4
12345 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
12346 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16>
12347 // CHECK: ret <4 x i16> [[TMP1]]
test_vqmovun_s32(int32x4_t a)12348 uint16x4_t test_vqmovun_s32(int32x4_t a) {
12349 return vqmovun_s32(a);
12350 }
12351
12352 // CHECK-LABEL: define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
12353 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12354 // CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
12355 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> [[VQMOVUN_V_I]]) #4
12356 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
12357 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32>
12358 // CHECK: ret <2 x i32> [[TMP1]]
test_vqmovun_s64(int64x2_t a)12359 uint32x2_t test_vqmovun_s64(int64x2_t a) {
12360 return vqmovun_s64(a);
12361 }
12362
12363
12364 // CHECK-LABEL: define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
12365 // CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4
12366 // CHECK: ret <8 x i8> [[VQNEG_V_I]]
test_vqneg_s8(int8x8_t a)12367 int8x8_t test_vqneg_s8(int8x8_t a) {
12368 return vqneg_s8(a);
12369 }
12370
12371 // CHECK-LABEL: define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
12372 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12373 // CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12374 // CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> [[VQNEG_V_I]]) #4
12375 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
12376 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16>
12377 // CHECK: ret <4 x i16> [[TMP1]]
test_vqneg_s16(int16x4_t a)12378 int16x4_t test_vqneg_s16(int16x4_t a) {
12379 return vqneg_s16(a);
12380 }
12381
12382 // CHECK-LABEL: define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
12383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12384 // CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12385 // CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> [[VQNEG_V_I]]) #4
12386 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
12387 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32>
12388 // CHECK: ret <2 x i32> [[TMP1]]
test_vqneg_s32(int32x2_t a)12389 int32x2_t test_vqneg_s32(int32x2_t a) {
12390 return vqneg_s32(a);
12391 }
12392
12393 // CHECK-LABEL: define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
12394 // CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4
12395 // CHECK: ret <16 x i8> [[VQNEGQ_V_I]]
test_vqnegq_s8(int8x16_t a)12396 int8x16_t test_vqnegq_s8(int8x16_t a) {
12397 return vqnegq_s8(a);
12398 }
12399
12400 // CHECK-LABEL: define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
12401 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12402 // CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12403 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]]) #4
12404 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
12405 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16>
12406 // CHECK: ret <8 x i16> [[TMP1]]
test_vqnegq_s16(int16x8_t a)12407 int16x8_t test_vqnegq_s16(int16x8_t a) {
12408 return vqnegq_s16(a);
12409 }
12410
12411 // CHECK-LABEL: define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
12412 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12413 // CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12414 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]]) #4
12415 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
12416 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32>
12417 // CHECK: ret <4 x i32> [[TMP1]]
test_vqnegq_s32(int32x4_t a)12418 int32x4_t test_vqnegq_s32(int32x4_t a) {
12419 return vqnegq_s32(a);
12420 }
12421
12422
12423 // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
12424 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12425 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
12426 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12427 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12428 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4
12429 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
12430 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
12431 // CHECK: ret <4 x i16> [[TMP2]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)12432 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
12433 return vqrdmulh_s16(a, b);
12434 }
12435
12436 // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
12437 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12438 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
12439 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12440 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12441 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4
12442 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
12443 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
12444 // CHECK: ret <2 x i32> [[TMP2]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)12445 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
12446 return vqrdmulh_s32(a, b);
12447 }
12448
12449 // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
12450 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12451 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
12452 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12453 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
12454 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4
12455 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
12456 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
12457 // CHECK: ret <8 x i16> [[TMP2]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)12458 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
12459 return vqrdmulhq_s16(a, b);
12460 }
12461
12462 // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
12463 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12464 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
12465 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12466 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
12467 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4
12468 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
12469 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
12470 // CHECK: ret <4 x i32> [[TMP2]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)12471 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
12472 return vqrdmulhq_s32(a, b);
12473 }
12474
12475
12476 // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
12477 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
12478 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12479 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
12480 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12481 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12482 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4
12483 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
12484 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
12485 // CHECK: ret <4 x i16> [[TMP2]]
test_vqrdmulh_lane_s16(int16x4_t a,int16x4_t b)12486 int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
12487 return vqrdmulh_lane_s16(a, b, 3);
12488 }
12489
12490 // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
12491 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
12492 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12493 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
12494 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12495 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12496 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4
12497 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
12498 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
12499 // CHECK: ret <2 x i32> [[TMP2]]
test_vqrdmulh_lane_s32(int32x2_t a,int32x2_t b)12500 int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) {
12501 return vqrdmulh_lane_s32(a, b, 1);
12502 }
12503
12504 // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 {
12505 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
12506 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12507 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
12508 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12509 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
12510 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4
12511 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
12512 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
12513 // CHECK: ret <8 x i16> [[TMP2]]
test_vqrdmulhq_lane_s16(int16x8_t a,int16x4_t b)12514 int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
12515 return vqrdmulhq_lane_s16(a, b, 3);
12516 }
12517
12518 // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 {
12519 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
12520 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12521 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
12522 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12523 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
12524 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4
12525 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
12526 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
12527 // CHECK: ret <4 x i32> [[TMP2]]
test_vqrdmulhq_lane_s32(int32x4_t a,int32x2_t b)12528 int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
12529 return vqrdmulhq_lane_s32(a, b, 1);
12530 }
12531
12532
12533 // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_n_s16(<4 x i16> %a, i16 signext %b) #0 {
12534 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12535 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
12536 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
12537 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
12538 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
12539 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
12540 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12541 // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12542 // CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V4_I]]) #4
12543 // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8>
12544 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V6_I]] to <4 x i16>
12545 // CHECK: ret <4 x i16> [[TMP2]]
test_vqrdmulh_n_s16(int16x4_t a,int16_t b)12546 int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
12547 return vqrdmulh_n_s16(a, b);
12548 }
12549
12550 // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_n_s32(<2 x i32> %a, i32 %b) #0 {
12551 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12552 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
12553 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
12554 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
12555 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12556 // CHECK: [[VQRDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12557 // CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V2_I]]) #4
12558 // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8>
12559 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V4_I]] to <2 x i32>
12560 // CHECK: ret <2 x i32> [[TMP2]]
test_vqrdmulh_n_s32(int32x2_t a,int32_t b)12561 int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
12562 return vqrdmulh_n_s32(a, b);
12563 }
12564
12565 // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_n_s16(<8 x i16> %a, i16 signext %b) #0 {
12566 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12567 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
12568 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
12569 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
12570 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
12571 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
12572 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
12573 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
12574 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
12575 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
12576 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12577 // CHECK: [[VQRDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
12578 // CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V8_I]]) #4
12579 // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8>
12580 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V10_I]] to <8 x i16>
12581 // CHECK: ret <8 x i16> [[TMP2]]
test_vqrdmulhq_n_s16(int16x8_t a,int16_t b)12582 int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
12583 return vqrdmulhq_n_s16(a, b);
12584 }
12585
12586 // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 {
12587 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12588 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
12589 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
12590 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
12591 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
12592 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
12593 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12594 // CHECK: [[VQRDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
12595 // CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V4_I]]) #4
12596 // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8>
12597 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V6_I]] to <4 x i32>
12598 // CHECK: ret <4 x i32> [[TMP2]]
test_vqrdmulhq_n_s32(int32x4_t a,int32_t b)12599 int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
12600 return vqrdmulhq_n_s32(a, b);
12601 }
12602
12603
12604 // CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
12605 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4
12606 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)12607 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
12608 return vqrshl_s8(a, b);
12609 }
12610
12611 // CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
12612 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12613 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
12614 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12615 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12616 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
12617 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
12618 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
12619 // CHECK: ret <4 x i16> [[TMP2]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)12620 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
12621 return vqrshl_s16(a, b);
12622 }
12623
12624 // CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
12625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12626 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
12627 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12628 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12629 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
12630 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
12631 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
12632 // CHECK: ret <2 x i32> [[TMP2]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)12633 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
12634 return vqrshl_s32(a, b);
12635 }
12636
12637 // CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
12638 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12639 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
12640 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
12641 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
12642 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
12643 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
12644 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
12645 // CHECK: ret <1 x i64> [[TMP2]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)12646 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
12647 return vqrshl_s64(a, b);
12648 }
12649
12650 // CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
12651 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
12652 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)12653 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
12654 return vqrshl_u8(a, b);
12655 }
12656
12657 // CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
12658 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12659 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
12660 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12661 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12662 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
12663 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
12664 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
12665 // CHECK: ret <4 x i16> [[TMP2]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)12666 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
12667 return vqrshl_u16(a, b);
12668 }
12669
12670 // CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
12671 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12672 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
12673 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12674 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12675 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
12676 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
12677 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
12678 // CHECK: ret <2 x i32> [[TMP2]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)12679 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
12680 return vqrshl_u32(a, b);
12681 }
12682
12683 // CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
12684 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12685 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
12686 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
12687 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
12688 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
12689 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
12690 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
12691 // CHECK: ret <1 x i64> [[TMP2]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)12692 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
12693 return vqrshl_u64(a, b);
12694 }
12695
12696 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
12697 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4
12698 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)12699 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
12700 return vqrshlq_s8(a, b);
12701 }
12702
12703 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
12704 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12705 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
12706 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12707 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
12708 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
12709 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
12710 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
12711 // CHECK: ret <8 x i16> [[TMP2]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)12712 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
12713 return vqrshlq_s16(a, b);
12714 }
12715
12716 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
12717 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12718 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
12719 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12720 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
12721 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
12722 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
12723 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
12724 // CHECK: ret <4 x i32> [[TMP2]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)12725 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
12726 return vqrshlq_s32(a, b);
12727 }
12728
12729 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
12730 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12731 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
12732 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
12733 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
12734 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
12735 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
12736 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
12737 // CHECK: ret <2 x i64> [[TMP2]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)12738 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
12739 return vqrshlq_s64(a, b);
12740 }
12741
12742 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
12743 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
12744 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)12745 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
12746 return vqrshlq_u8(a, b);
12747 }
12748
12749 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
12750 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12751 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
12752 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12753 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
12754 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
12755 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
12756 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
12757 // CHECK: ret <8 x i16> [[TMP2]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)12758 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
12759 return vqrshlq_u16(a, b);
12760 }
12761
12762 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
12763 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12764 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
12765 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12766 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
12767 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
12768 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
12769 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
12770 // CHECK: ret <4 x i32> [[TMP2]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)12771 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
12772 return vqrshlq_u32(a, b);
12773 }
12774
12775 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
12776 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12777 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
12778 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
12779 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
12780 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
12781 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
12782 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
12783 // CHECK: ret <2 x i64> [[TMP2]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)12784 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
12785 return vqrshlq_u64(a, b);
12786 }
12787
12788
12789 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 {
12790 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12791 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12792 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
12793 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)12794 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
12795 return vqrshrn_n_s16(a, 1);
12796 }
12797
12798 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 {
12799 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12800 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12801 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
12802 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)12803 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
12804 return vqrshrn_n_s32(a, 1);
12805 }
12806
12807 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 {
12808 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12809 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
12810 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
12811 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)12812 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
12813 return vqrshrn_n_s64(a, 1);
12814 }
12815
12816 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 {
12817 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12818 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12819 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
12820 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)12821 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
12822 return vqrshrn_n_u16(a, 1);
12823 }
12824
12825 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 {
12826 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12827 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12828 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
12829 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)12830 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
12831 return vqrshrn_n_u32(a, 1);
12832 }
12833
12834 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 {
12835 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12836 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
12837 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
12838 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)12839 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
12840 return vqrshrn_n_u64(a, 1);
12841 }
12842
12843
12844 // CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 {
12845 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12846 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12847 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
12848 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)12849 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
12850 return vqrshrun_n_s16(a, 1);
12851 }
12852
12853 // CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 {
12854 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12855 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12856 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
12857 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)12858 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
12859 return vqrshrun_n_s32(a, 1);
12860 }
12861
12862 // CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 {
12863 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12864 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
12865 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
12866 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)12867 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
12868 return vqrshrun_n_s64(a, 1);
12869 }
12870
12871
12872 // CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
12873 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4
12874 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)12875 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
12876 return vqshl_s8(a, b);
12877 }
12878
12879 // CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
12880 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12881 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
12882 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12883 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12884 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
12885 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
12886 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
12887 // CHECK: ret <4 x i16> [[TMP2]]
test_vqshl_s16(int16x4_t a,int16x4_t b)12888 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
12889 return vqshl_s16(a, b);
12890 }
12891
12892 // CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
12893 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12894 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
12895 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12896 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12897 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
12898 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
12899 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
12900 // CHECK: ret <2 x i32> [[TMP2]]
test_vqshl_s32(int32x2_t a,int32x2_t b)12901 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
12902 return vqshl_s32(a, b);
12903 }
12904
12905 // CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
12906 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12907 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
12908 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
12909 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
12910 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
12911 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
12912 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
12913 // CHECK: ret <1 x i64> [[TMP2]]
test_vqshl_s64(int64x1_t a,int64x1_t b)12914 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
12915 return vqshl_s64(a, b);
12916 }
12917
12918 // CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
12919 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
12920 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)12921 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
12922 return vqshl_u8(a, b);
12923 }
12924
12925 // CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
12926 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12927 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
12928 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
12929 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
12930 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
12931 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
12932 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
12933 // CHECK: ret <4 x i16> [[TMP2]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)12934 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
12935 return vqshl_u16(a, b);
12936 }
12937
12938 // CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
12939 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12940 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
12941 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
12942 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
12943 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
12944 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
12945 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
12946 // CHECK: ret <2 x i32> [[TMP2]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)12947 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
12948 return vqshl_u32(a, b);
12949 }
12950
12951 // CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
12952 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12953 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
12954 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
12955 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
12956 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
12957 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
12958 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
12959 // CHECK: ret <1 x i64> [[TMP2]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)12960 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
12961 return vqshl_u64(a, b);
12962 }
12963
12964 // CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
12965 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4
12966 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)12967 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
12968 return vqshlq_s8(a, b);
12969 }
12970
12971 // CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
12972 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12973 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
12974 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
12975 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
12976 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
12977 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
12978 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
12979 // CHECK: ret <8 x i16> [[TMP2]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)12980 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
12981 return vqshlq_s16(a, b);
12982 }
12983
12984 // CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
12985 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12986 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
12987 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
12988 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
12989 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
12990 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
12991 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
12992 // CHECK: ret <4 x i32> [[TMP2]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)12993 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
12994 return vqshlq_s32(a, b);
12995 }
12996
12997 // CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
12998 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12999 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13000 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13001 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
13002 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
13003 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
13004 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
13005 // CHECK: ret <2 x i64> [[TMP2]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)13006 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
13007 return vqshlq_s64(a, b);
13008 }
13009
13010 // CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
13011 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
13012 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)13013 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
13014 return vqshlq_u8(a, b);
13015 }
13016
13017 // CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
13018 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13019 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13020 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13021 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
13022 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
13023 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
13024 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
13025 // CHECK: ret <8 x i16> [[TMP2]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)13026 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
13027 return vqshlq_u16(a, b);
13028 }
13029
13030 // CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
13031 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13032 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13033 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13034 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
13035 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
13036 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
13037 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
13038 // CHECK: ret <4 x i32> [[TMP2]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)13039 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
13040 return vqshlq_u32(a, b);
13041 }
13042
13043 // CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
13044 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13045 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13046 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13047 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
13048 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
13049 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
13050 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
13051 // CHECK: ret <2 x i64> [[TMP2]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)13052 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
13053 return vqshlq_u64(a, b);
13054 }
13055
13056
13057 // CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 {
13058 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
13059 // CHECK: ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)13060 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
13061 return vqshlu_n_s8(a, 1);
13062 }
13063
13064 // CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 {
13065 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13066 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13067 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
13068 // CHECK: ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)13069 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
13070 return vqshlu_n_s16(a, 1);
13071 }
13072
13073 // CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 {
13074 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13075 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13076 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 1, i32 1>)
13077 // CHECK: ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)13078 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
13079 return vqshlu_n_s32(a, 1);
13080 }
13081
13082 // CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 {
13083 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13084 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13085 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
13086 // CHECK: ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)13087 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
13088 return vqshlu_n_s64(a, 1);
13089 }
13090
13091 // CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 {
13092 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
13093 // CHECK: ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)13094 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
13095 return vqshluq_n_s8(a, 1);
13096 }
13097
13098 // CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 {
13099 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13100 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13101 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
13102 // CHECK: ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)13103 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
13104 return vqshluq_n_s16(a, 1);
13105 }
13106
13107 // CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 {
13108 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13109 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13110 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
13111 // CHECK: ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)13112 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
13113 return vqshluq_n_s32(a, 1);
13114 }
13115
13116 // CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 {
13117 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13118 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13119 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 1, i64 1>)
13120 // CHECK: ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)13121 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
13122 return vqshluq_n_s64(a, 1);
13123 }
13124
13125
13126 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 {
13127 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
13128 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)13129 int8x8_t test_vqshl_n_s8(int8x8_t a) {
13130 return vqshl_n_s8(a, 1);
13131 }
13132
13133 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 {
13134 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13135 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13136 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
13137 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)13138 int16x4_t test_vqshl_n_s16(int16x4_t a) {
13139 return vqshl_n_s16(a, 1);
13140 }
13141
13142 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 {
13143 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13144 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13145 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
13146 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)13147 int32x2_t test_vqshl_n_s32(int32x2_t a) {
13148 return vqshl_n_s32(a, 1);
13149 }
13150
13151 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 {
13152 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13153 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13154 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
13155 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)13156 int64x1_t test_vqshl_n_s64(int64x1_t a) {
13157 return vqshl_n_s64(a, 1);
13158 }
13159
13160 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 {
13161 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
13162 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)13163 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
13164 return vqshl_n_u8(a, 1);
13165 }
13166
13167 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 {
13168 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13169 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13170 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
13171 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)13172 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
13173 return vqshl_n_u16(a, 1);
13174 }
13175
13176 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 {
13177 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13178 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13179 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
13180 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)13181 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
13182 return vqshl_n_u32(a, 1);
13183 }
13184
13185 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 {
13186 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13187 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13188 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
13189 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)13190 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
13191 return vqshl_n_u64(a, 1);
13192 }
13193
13194 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 {
13195 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
13196 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)13197 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
13198 return vqshlq_n_s8(a, 1);
13199 }
13200
13201 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 {
13202 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13203 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13204 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
13205 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)13206 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
13207 return vqshlq_n_s16(a, 1);
13208 }
13209
13210 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 {
13211 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13212 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13213 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
13214 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)13215 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
13216 return vqshlq_n_s32(a, 1);
13217 }
13218
13219 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 {
13220 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13221 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13222 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
13223 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)13224 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
13225 return vqshlq_n_s64(a, 1);
13226 }
13227
13228 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 {
13229 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
13230 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)13231 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
13232 return vqshlq_n_u8(a, 1);
13233 }
13234
13235 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 {
13236 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13237 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13238 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
13239 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)13240 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
13241 return vqshlq_n_u16(a, 1);
13242 }
13243
13244 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 {
13245 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13246 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13247 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
13248 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)13249 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
13250 return vqshlq_n_u32(a, 1);
13251 }
13252
13253 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 {
13254 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13255 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13256 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
13257 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)13258 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
13259 return vqshlq_n_u64(a, 1);
13260 }
13261
13262
13263 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 {
13264 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13265 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13266 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13267 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)13268 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
13269 return vqshrn_n_s16(a, 1);
13270 }
13271
13272 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 {
13273 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13274 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13275 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13276 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)13277 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
13278 return vqshrn_n_s32(a, 1);
13279 }
13280
13281 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 {
13282 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13283 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13284 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13285 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)13286 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
13287 return vqshrn_n_s64(a, 1);
13288 }
13289
13290 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 {
13291 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13292 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13293 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13294 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)13295 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
13296 return vqshrn_n_u16(a, 1);
13297 }
13298
13299 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 {
13300 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13301 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13302 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13303 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)13304 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
13305 return vqshrn_n_u32(a, 1);
13306 }
13307
13308 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 {
13309 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13310 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13311 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13312 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)13313 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
13314 return vqshrn_n_u64(a, 1);
13315 }
13316
13317
13318 // CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 {
13319 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13320 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13321 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13322 // CHECK: ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)13323 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
13324 return vqshrun_n_s16(a, 1);
13325 }
13326
13327 // CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 {
13328 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13329 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13330 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13331 // CHECK: ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)13332 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
13333 return vqshrun_n_s32(a, 1);
13334 }
13335
13336 // CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 {
13337 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13338 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13339 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
13340 // CHECK: ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)13341 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
13342 return vqshrun_n_s64(a, 1);
13343 }
13344
13345
13346 // CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 {
13347 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4
13348 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)13349 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
13350 return vqsub_s8(a, b);
13351 }
13352
13353 // CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 {
13354 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13355 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13356 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13357 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
13358 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
13359 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
13360 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
13361 // CHECK: ret <4 x i16> [[TMP2]]
test_vqsub_s16(int16x4_t a,int16x4_t b)13362 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
13363 return vqsub_s16(a, b);
13364 }
13365
13366 // CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 {
13367 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13368 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13369 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13370 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
13371 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
13372 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
13373 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
13374 // CHECK: ret <2 x i32> [[TMP2]]
test_vqsub_s32(int32x2_t a,int32x2_t b)13375 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
13376 return vqsub_s32(a, b);
13377 }
13378
13379 // CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 {
13380 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13381 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13382 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13383 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13384 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
13385 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
13386 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
13387 // CHECK: ret <1 x i64> [[TMP2]]
test_vqsub_s64(int64x1_t a,int64x1_t b)13388 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
13389 return vqsub_s64(a, b);
13390 }
13391
13392 // CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 {
13393 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
13394 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)13395 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
13396 return vqsub_u8(a, b);
13397 }
13398
13399 // CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 {
13400 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13401 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13402 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13403 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
13404 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
13405 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
13406 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
13407 // CHECK: ret <4 x i16> [[TMP2]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)13408 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
13409 return vqsub_u16(a, b);
13410 }
13411
13412 // CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 {
13413 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13414 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13415 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13416 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
13417 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
13418 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
13419 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
13420 // CHECK: ret <2 x i32> [[TMP2]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)13421 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
13422 return vqsub_u32(a, b);
13423 }
13424
13425 // CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 {
13426 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13427 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13428 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13429 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13430 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
13431 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
13432 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
13433 // CHECK: ret <1 x i64> [[TMP2]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)13434 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
13435 return vqsub_u64(a, b);
13436 }
13437
13438 // CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
13439 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4
13440 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)13441 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
13442 return vqsubq_s8(a, b);
13443 }
13444
13445 // CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
13446 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13447 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13448 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13449 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
13450 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
13451 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
13452 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
13453 // CHECK: ret <8 x i16> [[TMP2]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)13454 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
13455 return vqsubq_s16(a, b);
13456 }
13457
13458 // CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
13459 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13460 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13461 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13462 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
13463 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
13464 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
13465 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
13466 // CHECK: ret <4 x i32> [[TMP2]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)13467 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
13468 return vqsubq_s32(a, b);
13469 }
13470
13471 // CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
13472 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13473 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13474 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13475 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
13476 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
13477 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
13478 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
13479 // CHECK: ret <2 x i64> [[TMP2]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)13480 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
13481 return vqsubq_s64(a, b);
13482 }
13483
13484 // CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
13485 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
13486 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)13487 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
13488 return vqsubq_u8(a, b);
13489 }
13490
13491 // CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
13492 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13493 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13494 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13495 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
13496 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
13497 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
13498 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
13499 // CHECK: ret <8 x i16> [[TMP2]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)13500 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
13501 return vqsubq_u16(a, b);
13502 }
13503
13504 // CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
13505 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13506 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13507 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13508 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
13509 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
13510 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
13511 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
13512 // CHECK: ret <4 x i32> [[TMP2]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)13513 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
13514 return vqsubq_u32(a, b);
13515 }
13516
13517 // CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
13518 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13519 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13520 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13521 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
13522 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
13523 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
13524 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
13525 // CHECK: ret <2 x i64> [[TMP2]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)13526 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
13527 return vqsubq_u64(a, b);
13528 }
13529
13530
13531 // CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
13532 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13533 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13534 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13535 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
13536 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
13537 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)13538 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
13539 return vraddhn_s16(a, b);
13540 }
13541
13542 // CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
13543 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13544 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13545 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13546 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
13547 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
13548 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
13549 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
13550 // CHECK: ret <4 x i16> [[TMP2]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)13551 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
13552 return vraddhn_s32(a, b);
13553 }
13554
13555 // CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
13556 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13557 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13558 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13559 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
13560 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
13561 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
13562 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
13563 // CHECK: ret <2 x i32> [[TMP2]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)13564 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
13565 return vraddhn_s64(a, b);
13566 }
13567
13568 // CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
13569 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13571 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13572 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
13573 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
13574 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)13575 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
13576 return vraddhn_u16(a, b);
13577 }
13578
13579 // CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
13580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13582 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13583 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
13584 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
13585 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
13586 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
13587 // CHECK: ret <4 x i16> [[TMP2]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)13588 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
13589 return vraddhn_u32(a, b);
13590 }
13591
13592 // CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
13593 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13594 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13595 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13596 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
13597 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
13598 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
13599 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
13600 // CHECK: ret <2 x i32> [[TMP2]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)13601 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
13602 return vraddhn_u64(a, b);
13603 }
13604
13605
13606 // CHECK-LABEL: define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
13607 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13608 // CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
13609 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> [[VRECPE_V_I]]) #4
13610 // CHECK: ret <2 x float> [[VRECPE_V1_I]]
test_vrecpe_f32(float32x2_t a)13611 float32x2_t test_vrecpe_f32(float32x2_t a) {
13612 return vrecpe_f32(a);
13613 }
13614
13615 // CHECK-LABEL: define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
13616 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13617 // CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13618 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> [[VRECPE_V_I]]) #4
13619 // CHECK: ret <2 x i32> [[VRECPE_V1_I]]
test_vrecpe_u32(uint32x2_t a)13620 uint32x2_t test_vrecpe_u32(uint32x2_t a) {
13621 return vrecpe_u32(a);
13622 }
13623
13624 // CHECK-LABEL: define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
13625 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13626 // CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
13627 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> [[VRECPEQ_V_I]]) #4
13628 // CHECK: ret <4 x float> [[VRECPEQ_V1_I]]
test_vrecpeq_f32(float32x4_t a)13629 float32x4_t test_vrecpeq_f32(float32x4_t a) {
13630 return vrecpeq_f32(a);
13631 }
13632
13633 // CHECK-LABEL: define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
13634 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13635 // CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13636 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]]) #4
13637 // CHECK: ret <4 x i32> [[VRECPEQ_V1_I]]
test_vrecpeq_u32(uint32x4_t a)13638 uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
13639 return vrecpeq_u32(a);
13640 }
13641
13642
13643 // CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %a, <2 x float> %b) #0 {
13644 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13645 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
13646 // CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
13647 // CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
13648 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4
13649 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
13650 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float>
13651 // CHECK: ret <2 x float> [[TMP2]]
test_vrecps_f32(float32x2_t a,float32x2_t b)13652 float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) {
13653 return vrecps_f32(a, b);
13654 }
13655
13656 // CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %a, <4 x float> %b) #0 {
13657 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13658 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
13659 // CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
13660 // CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
13661 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4
13662 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
13663 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float>
13664 // CHECK: ret <4 x float> [[TMP2]]
test_vrecpsq_f32(float32x4_t a,float32x4_t b)13665 float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) {
13666 return vrecpsq_f32(a, b);
13667 }
13668
13669
13670 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 {
13671 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13672 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)13673 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
13674 return vreinterpret_s8_s16(a);
13675 }
13676
13677 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 {
13678 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13679 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)13680 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
13681 return vreinterpret_s8_s32(a);
13682 }
13683
13684 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 {
13685 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13686 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)13687 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
13688 return vreinterpret_s8_s64(a);
13689 }
13690
13691 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 {
13692 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)13693 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
13694 return vreinterpret_s8_u8(a);
13695 }
13696
13697 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 {
13698 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13699 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)13700 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
13701 return vreinterpret_s8_u16(a);
13702 }
13703
13704 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 {
13705 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13706 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)13707 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
13708 return vreinterpret_s8_u32(a);
13709 }
13710
13711 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 {
13712 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13713 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)13714 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
13715 return vreinterpret_s8_u64(a);
13716 }
13717
13718 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 {
13719 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
13720 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)13721 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
13722 return vreinterpret_s8_f16(a);
13723 }
13724
13725 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 {
13726 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13727 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)13728 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
13729 return vreinterpret_s8_f32(a);
13730 }
13731
13732 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 {
13733 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)13734 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
13735 return vreinterpret_s8_p8(a);
13736 }
13737
13738 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 {
13739 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13740 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)13741 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
13742 return vreinterpret_s8_p16(a);
13743 }
13744
13745 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 {
13746 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
13747 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)13748 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
13749 return vreinterpret_s16_s8(a);
13750 }
13751
13752 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 {
13753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
13754 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)13755 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
13756 return vreinterpret_s16_s32(a);
13757 }
13758
13759 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 {
13760 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
13761 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)13762 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
13763 return vreinterpret_s16_s64(a);
13764 }
13765
13766 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 {
13767 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
13768 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)13769 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
13770 return vreinterpret_s16_u8(a);
13771 }
13772
13773 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 {
13774 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)13775 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
13776 return vreinterpret_s16_u16(a);
13777 }
13778
13779 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 {
13780 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
13781 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)13782 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
13783 return vreinterpret_s16_u32(a);
13784 }
13785
13786 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 {
13787 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
13788 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)13789 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
13790 return vreinterpret_s16_u64(a);
13791 }
13792
13793 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 {
13794 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
13795 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)13796 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
13797 return vreinterpret_s16_f16(a);
13798 }
13799
13800 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 {
13801 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
13802 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)13803 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
13804 return vreinterpret_s16_f32(a);
13805 }
13806
13807 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 {
13808 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
13809 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)13810 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
13811 return vreinterpret_s16_p8(a);
13812 }
13813
13814 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 {
13815 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)13816 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
13817 return vreinterpret_s16_p16(a);
13818 }
13819
13820 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 {
13821 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
13822 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)13823 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
13824 return vreinterpret_s32_s8(a);
13825 }
13826
13827 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 {
13828 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
13829 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)13830 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
13831 return vreinterpret_s32_s16(a);
13832 }
13833
13834 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 {
13835 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
13836 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)13837 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
13838 return vreinterpret_s32_s64(a);
13839 }
13840
13841 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 {
13842 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
13843 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)13844 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
13845 return vreinterpret_s32_u8(a);
13846 }
13847
13848 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 {
13849 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
13850 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)13851 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
13852 return vreinterpret_s32_u16(a);
13853 }
13854
13855 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 {
13856 // CHECK: ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)13857 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
13858 return vreinterpret_s32_u32(a);
13859 }
13860
13861 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 {
13862 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
13863 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)13864 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
13865 return vreinterpret_s32_u64(a);
13866 }
13867
13868 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 {
13869 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
13870 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)13871 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
13872 return vreinterpret_s32_f16(a);
13873 }
13874
13875 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 {
13876 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
13877 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)13878 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
13879 return vreinterpret_s32_f32(a);
13880 }
13881
13882 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 {
13883 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
13884 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)13885 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
13886 return vreinterpret_s32_p8(a);
13887 }
13888
13889 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 {
13890 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
13891 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)13892 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
13893 return vreinterpret_s32_p16(a);
13894 }
13895
13896 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 {
13897 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
13898 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)13899 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
13900 return vreinterpret_s64_s8(a);
13901 }
13902
13903 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 {
13904 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
13905 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)13906 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
13907 return vreinterpret_s64_s16(a);
13908 }
13909
13910 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 {
13911 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
13912 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)13913 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
13914 return vreinterpret_s64_s32(a);
13915 }
13916
13917 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 {
13918 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
13919 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)13920 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
13921 return vreinterpret_s64_u8(a);
13922 }
13923
13924 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 {
13925 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
13926 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)13927 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
13928 return vreinterpret_s64_u16(a);
13929 }
13930
13931 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 {
13932 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
13933 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)13934 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
13935 return vreinterpret_s64_u32(a);
13936 }
13937
13938 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 {
13939 // CHECK: ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)13940 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
13941 return vreinterpret_s64_u64(a);
13942 }
13943
13944 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 {
13945 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
13946 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)13947 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
13948 return vreinterpret_s64_f16(a);
13949 }
13950
13951 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 {
13952 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
13953 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)13954 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
13955 return vreinterpret_s64_f32(a);
13956 }
13957
13958 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 {
13959 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
13960 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)13961 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
13962 return vreinterpret_s64_p8(a);
13963 }
13964
13965 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 {
13966 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
13967 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)13968 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
13969 return vreinterpret_s64_p16(a);
13970 }
13971
13972 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 {
13973 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)13974 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
13975 return vreinterpret_u8_s8(a);
13976 }
13977
13978 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 {
13979 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13980 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)13981 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
13982 return vreinterpret_u8_s16(a);
13983 }
13984
13985 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 {
13986 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13987 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)13988 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
13989 return vreinterpret_u8_s32(a);
13990 }
13991
13992 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 {
13993 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13994 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)13995 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
13996 return vreinterpret_u8_s64(a);
13997 }
13998
13999 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 {
14000 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14001 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)14002 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
14003 return vreinterpret_u8_u16(a);
14004 }
14005
14006 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 {
14007 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14008 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)14009 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
14010 return vreinterpret_u8_u32(a);
14011 }
14012
14013 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 {
14014 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14015 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)14016 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
14017 return vreinterpret_u8_u64(a);
14018 }
14019
14020 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 {
14021 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
14022 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)14023 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
14024 return vreinterpret_u8_f16(a);
14025 }
14026
14027 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 {
14028 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
14029 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)14030 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
14031 return vreinterpret_u8_f32(a);
14032 }
14033
14034 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 {
14035 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)14036 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
14037 return vreinterpret_u8_p8(a);
14038 }
14039
14040 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 {
14041 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14042 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)14043 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
14044 return vreinterpret_u8_p16(a);
14045 }
14046
14047 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 {
14048 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14049 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)14050 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
14051 return vreinterpret_u16_s8(a);
14052 }
14053
14054 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 {
14055 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)14056 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
14057 return vreinterpret_u16_s16(a);
14058 }
14059
14060 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 {
14061 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14062 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)14063 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
14064 return vreinterpret_u16_s32(a);
14065 }
14066
14067 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 {
14068 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14069 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)14070 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
14071 return vreinterpret_u16_s64(a);
14072 }
14073
14074 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 {
14075 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14076 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)14077 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
14078 return vreinterpret_u16_u8(a);
14079 }
14080
14081 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 {
14082 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14083 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)14084 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
14085 return vreinterpret_u16_u32(a);
14086 }
14087
14088 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 {
14089 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14090 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)14091 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
14092 return vreinterpret_u16_u64(a);
14093 }
14094
14095 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 {
14096 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
14097 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)14098 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
14099 return vreinterpret_u16_f16(a);
14100 }
14101
14102 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 {
14103 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
14104 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)14105 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
14106 return vreinterpret_u16_f32(a);
14107 }
14108
14109 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 {
14110 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14111 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)14112 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
14113 return vreinterpret_u16_p8(a);
14114 }
14115
14116 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 {
14117 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)14118 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
14119 return vreinterpret_u16_p16(a);
14120 }
14121
14122 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 {
14123 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14124 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)14125 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
14126 return vreinterpret_u32_s8(a);
14127 }
14128
14129 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 {
14130 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14131 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)14132 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
14133 return vreinterpret_u32_s16(a);
14134 }
14135
14136 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 {
14137 // CHECK: ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)14138 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
14139 return vreinterpret_u32_s32(a);
14140 }
14141
14142 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 {
14143 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14144 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)14145 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
14146 return vreinterpret_u32_s64(a);
14147 }
14148
14149 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 {
14150 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14151 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)14152 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
14153 return vreinterpret_u32_u8(a);
14154 }
14155
14156 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 {
14157 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14158 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)14159 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
14160 return vreinterpret_u32_u16(a);
14161 }
14162
14163 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 {
14164 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14165 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)14166 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
14167 return vreinterpret_u32_u64(a);
14168 }
14169
14170 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 {
14171 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
14172 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)14173 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
14174 return vreinterpret_u32_f16(a);
14175 }
14176
14177 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 {
14178 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
14179 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)14180 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
14181 return vreinterpret_u32_f32(a);
14182 }
14183
14184 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 {
14185 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14186 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)14187 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
14188 return vreinterpret_u32_p8(a);
14189 }
14190
14191 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 {
14192 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14193 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)14194 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
14195 return vreinterpret_u32_p16(a);
14196 }
14197
14198 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 {
14199 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14200 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)14201 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
14202 return vreinterpret_u64_s8(a);
14203 }
14204
14205 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 {
14206 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14207 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)14208 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
14209 return vreinterpret_u64_s16(a);
14210 }
14211
14212 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 {
14213 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14214 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)14215 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
14216 return vreinterpret_u64_s32(a);
14217 }
14218
14219 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 {
14220 // CHECK: ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)14221 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
14222 return vreinterpret_u64_s64(a);
14223 }
14224
14225 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 {
14226 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14227 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)14228 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
14229 return vreinterpret_u64_u8(a);
14230 }
14231
14232 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 {
14233 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14234 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)14235 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
14236 return vreinterpret_u64_u16(a);
14237 }
14238
14239 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 {
14240 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14241 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)14242 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
14243 return vreinterpret_u64_u32(a);
14244 }
14245
14246 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 {
14247 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
14248 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)14249 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
14250 return vreinterpret_u64_f16(a);
14251 }
14252
14253 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 {
14254 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
14255 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)14256 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
14257 return vreinterpret_u64_f32(a);
14258 }
14259
14260 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 {
14261 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14262 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)14263 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
14264 return vreinterpret_u64_p8(a);
14265 }
14266
14267 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 {
14268 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14269 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)14270 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
14271 return vreinterpret_u64_p16(a);
14272 }
14273
14274 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 {
14275 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
14276 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)14277 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
14278 return vreinterpret_f16_s8(a);
14279 }
14280
14281 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 {
14282 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
14283 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)14284 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
14285 return vreinterpret_f16_s16(a);
14286 }
14287
14288 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 {
14289 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
14290 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)14291 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
14292 return vreinterpret_f16_s32(a);
14293 }
14294
14295 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 {
14296 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
14297 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)14298 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
14299 return vreinterpret_f16_s64(a);
14300 }
14301
14302 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 {
14303 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
14304 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)14305 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
14306 return vreinterpret_f16_u8(a);
14307 }
14308
14309 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 {
14310 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
14311 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)14312 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
14313 return vreinterpret_f16_u16(a);
14314 }
14315
14316 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 {
14317 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
14318 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)14319 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
14320 return vreinterpret_f16_u32(a);
14321 }
14322
14323 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 {
14324 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
14325 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)14326 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
14327 return vreinterpret_f16_u64(a);
14328 }
14329
14330 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 {
14331 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
14332 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)14333 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
14334 return vreinterpret_f16_f32(a);
14335 }
14336
14337 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 {
14338 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
14339 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)14340 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
14341 return vreinterpret_f16_p8(a);
14342 }
14343
14344 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 {
14345 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
14346 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)14347 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
14348 return vreinterpret_f16_p16(a);
14349 }
14350
14351 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 {
14352 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
14353 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)14354 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
14355 return vreinterpret_f32_s8(a);
14356 }
14357
14358 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 {
14359 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
14360 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)14361 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
14362 return vreinterpret_f32_s16(a);
14363 }
14364
14365 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 {
14366 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
14367 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)14368 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
14369 return vreinterpret_f32_s32(a);
14370 }
14371
14372 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 {
14373 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
14374 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)14375 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
14376 return vreinterpret_f32_s64(a);
14377 }
14378
14379 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 {
14380 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
14381 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)14382 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
14383 return vreinterpret_f32_u8(a);
14384 }
14385
14386 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 {
14387 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
14388 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)14389 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
14390 return vreinterpret_f32_u16(a);
14391 }
14392
14393 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 {
14394 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
14395 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)14396 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
14397 return vreinterpret_f32_u32(a);
14398 }
14399
14400 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 {
14401 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
14402 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)14403 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
14404 return vreinterpret_f32_u64(a);
14405 }
14406
14407 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 {
14408 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
14409 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)14410 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
14411 return vreinterpret_f32_f16(a);
14412 }
14413
14414 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 {
14415 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
14416 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)14417 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
14418 return vreinterpret_f32_p8(a);
14419 }
14420
14421 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 {
14422 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
14423 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)14424 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
14425 return vreinterpret_f32_p16(a);
14426 }
14427
14428 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 {
14429 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)14430 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
14431 return vreinterpret_p8_s8(a);
14432 }
14433
14434 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 {
14435 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14436 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)14437 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
14438 return vreinterpret_p8_s16(a);
14439 }
14440
14441 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 {
14442 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14443 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)14444 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
14445 return vreinterpret_p8_s32(a);
14446 }
14447
14448 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 {
14449 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14450 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)14451 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
14452 return vreinterpret_p8_s64(a);
14453 }
14454
14455 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 {
14456 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)14457 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
14458 return vreinterpret_p8_u8(a);
14459 }
14460
14461 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 {
14462 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14463 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)14464 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
14465 return vreinterpret_p8_u16(a);
14466 }
14467
14468 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 {
14469 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14470 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)14471 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
14472 return vreinterpret_p8_u32(a);
14473 }
14474
14475 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 {
14476 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14477 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)14478 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
14479 return vreinterpret_p8_u64(a);
14480 }
14481
14482 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 {
14483 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
14484 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)14485 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
14486 return vreinterpret_p8_f16(a);
14487 }
14488
14489 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 {
14490 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
14491 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)14492 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
14493 return vreinterpret_p8_f32(a);
14494 }
14495
14496 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 {
14497 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14498 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)14499 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
14500 return vreinterpret_p8_p16(a);
14501 }
14502
14503 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 {
14504 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14505 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)14506 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
14507 return vreinterpret_p16_s8(a);
14508 }
14509
14510 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 {
14511 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)14512 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
14513 return vreinterpret_p16_s16(a);
14514 }
14515
14516 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 {
14517 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14518 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)14519 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
14520 return vreinterpret_p16_s32(a);
14521 }
14522
14523 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 {
14524 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14525 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)14526 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
14527 return vreinterpret_p16_s64(a);
14528 }
14529
14530 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 {
14531 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14532 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)14533 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
14534 return vreinterpret_p16_u8(a);
14535 }
14536
14537 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 {
14538 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)14539 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
14540 return vreinterpret_p16_u16(a);
14541 }
14542
14543 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 {
14544 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14545 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)14546 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
14547 return vreinterpret_p16_u32(a);
14548 }
14549
14550 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 {
14551 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14552 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)14553 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
14554 return vreinterpret_p16_u64(a);
14555 }
14556
14557 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 {
14558 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
14559 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)14560 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
14561 return vreinterpret_p16_f16(a);
14562 }
14563
14564 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 {
14565 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
14566 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)14567 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
14568 return vreinterpret_p16_f32(a);
14569 }
14570
14571 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 {
14572 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14573 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)14574 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
14575 return vreinterpret_p16_p8(a);
14576 }
14577
14578 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 {
14579 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14580 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)14581 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
14582 return vreinterpretq_s8_s16(a);
14583 }
14584
14585 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 {
14586 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14587 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)14588 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
14589 return vreinterpretq_s8_s32(a);
14590 }
14591
14592 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 {
14593 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14594 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)14595 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
14596 return vreinterpretq_s8_s64(a);
14597 }
14598
14599 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 {
14600 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)14601 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
14602 return vreinterpretq_s8_u8(a);
14603 }
14604
14605 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 {
14606 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14607 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)14608 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
14609 return vreinterpretq_s8_u16(a);
14610 }
14611
14612 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 {
14613 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14614 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)14615 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
14616 return vreinterpretq_s8_u32(a);
14617 }
14618
14619 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 {
14620 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14621 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)14622 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
14623 return vreinterpretq_s8_u64(a);
14624 }
14625
14626 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 {
14627 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
14628 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)14629 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
14630 return vreinterpretq_s8_f16(a);
14631 }
14632
14633 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 {
14634 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
14635 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)14636 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
14637 return vreinterpretq_s8_f32(a);
14638 }
14639
14640 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 {
14641 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)14642 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
14643 return vreinterpretq_s8_p8(a);
14644 }
14645
14646 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 {
14647 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14648 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)14649 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
14650 return vreinterpretq_s8_p16(a);
14651 }
14652
14653 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 {
14654 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
14655 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)14656 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
14657 return vreinterpretq_s16_s8(a);
14658 }
14659
14660 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 {
14661 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
14662 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)14663 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
14664 return vreinterpretq_s16_s32(a);
14665 }
14666
14667 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 {
14668 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
14669 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)14670 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
14671 return vreinterpretq_s16_s64(a);
14672 }
14673
14674 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 {
14675 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
14676 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)14677 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
14678 return vreinterpretq_s16_u8(a);
14679 }
14680
14681 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 {
14682 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)14683 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
14684 return vreinterpretq_s16_u16(a);
14685 }
14686
14687 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 {
14688 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
14689 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)14690 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
14691 return vreinterpretq_s16_u32(a);
14692 }
14693
14694 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 {
14695 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
14696 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)14697 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
14698 return vreinterpretq_s16_u64(a);
14699 }
14700
14701 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 {
14702 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
14703 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)14704 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
14705 return vreinterpretq_s16_f16(a);
14706 }
14707
14708 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 {
14709 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
14710 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)14711 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
14712 return vreinterpretq_s16_f32(a);
14713 }
14714
14715 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 {
14716 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
14717 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)14718 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
14719 return vreinterpretq_s16_p8(a);
14720 }
14721
14722 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 {
14723 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)14724 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
14725 return vreinterpretq_s16_p16(a);
14726 }
14727
14728 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 {
14729 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
14730 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)14731 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
14732 return vreinterpretq_s32_s8(a);
14733 }
14734
14735 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 {
14736 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
14737 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)14738 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
14739 return vreinterpretq_s32_s16(a);
14740 }
14741
14742 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 {
14743 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
14744 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)14745 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
14746 return vreinterpretq_s32_s64(a);
14747 }
14748
14749 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 {
14750 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
14751 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)14752 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
14753 return vreinterpretq_s32_u8(a);
14754 }
14755
14756 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 {
14757 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
14758 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)14759 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
14760 return vreinterpretq_s32_u16(a);
14761 }
14762
14763 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 {
14764 // CHECK: ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)14765 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
14766 return vreinterpretq_s32_u32(a);
14767 }
14768
14769 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 {
14770 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
14771 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)14772 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
14773 return vreinterpretq_s32_u64(a);
14774 }
14775
14776 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 {
14777 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
14778 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)14779 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
14780 return vreinterpretq_s32_f16(a);
14781 }
14782
14783 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 {
14784 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
14785 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)14786 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
14787 return vreinterpretq_s32_f32(a);
14788 }
14789
14790 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 {
14791 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
14792 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)14793 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
14794 return vreinterpretq_s32_p8(a);
14795 }
14796
14797 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 {
14798 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
14799 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)14800 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
14801 return vreinterpretq_s32_p16(a);
14802 }
14803
14804 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 {
14805 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
14806 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)14807 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
14808 return vreinterpretq_s64_s8(a);
14809 }
14810
14811 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 {
14812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
14813 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)14814 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
14815 return vreinterpretq_s64_s16(a);
14816 }
14817
14818 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 {
14819 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
14820 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)14821 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
14822 return vreinterpretq_s64_s32(a);
14823 }
14824
14825 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 {
14826 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
14827 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)14828 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
14829 return vreinterpretq_s64_u8(a);
14830 }
14831
14832 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 {
14833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
14834 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)14835 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
14836 return vreinterpretq_s64_u16(a);
14837 }
14838
14839 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 {
14840 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
14841 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)14842 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
14843 return vreinterpretq_s64_u32(a);
14844 }
14845
14846 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 {
14847 // CHECK: ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)14848 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
14849 return vreinterpretq_s64_u64(a);
14850 }
14851
14852 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 {
14853 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
14854 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)14855 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
14856 return vreinterpretq_s64_f16(a);
14857 }
14858
14859 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 {
14860 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
14861 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)14862 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
14863 return vreinterpretq_s64_f32(a);
14864 }
14865
14866 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 {
14867 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
14868 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)14869 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
14870 return vreinterpretq_s64_p8(a);
14871 }
14872
14873 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 {
14874 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
14875 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)14876 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
14877 return vreinterpretq_s64_p16(a);
14878 }
14879
14880 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 {
14881 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)14882 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
14883 return vreinterpretq_u8_s8(a);
14884 }
14885
14886 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 {
14887 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14888 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)14889 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
14890 return vreinterpretq_u8_s16(a);
14891 }
14892
14893 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 {
14894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14895 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)14896 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
14897 return vreinterpretq_u8_s32(a);
14898 }
14899
14900 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 {
14901 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14902 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)14903 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
14904 return vreinterpretq_u8_s64(a);
14905 }
14906
14907 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 {
14908 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14909 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)14910 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
14911 return vreinterpretq_u8_u16(a);
14912 }
14913
14914 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 {
14915 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14916 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)14917 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
14918 return vreinterpretq_u8_u32(a);
14919 }
14920
14921 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 {
14922 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14923 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)14924 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
14925 return vreinterpretq_u8_u64(a);
14926 }
14927
14928 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 {
14929 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
14930 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)14931 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
14932 return vreinterpretq_u8_f16(a);
14933 }
14934
14935 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 {
14936 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
14937 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)14938 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
14939 return vreinterpretq_u8_f32(a);
14940 }
14941
14942 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 {
14943 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)14944 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
14945 return vreinterpretq_u8_p8(a);
14946 }
14947
14948 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 {
14949 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14950 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)14951 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
14952 return vreinterpretq_u8_p16(a);
14953 }
14954
14955 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 {
14956 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
14957 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)14958 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
14959 return vreinterpretq_u16_s8(a);
14960 }
14961
14962 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 {
14963 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)14964 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
14965 return vreinterpretq_u16_s16(a);
14966 }
14967
14968 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 {
14969 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
14970 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)14971 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
14972 return vreinterpretq_u16_s32(a);
14973 }
14974
14975 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 {
14976 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
14977 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)14978 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
14979 return vreinterpretq_u16_s64(a);
14980 }
14981
14982 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 {
14983 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
14984 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)14985 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
14986 return vreinterpretq_u16_u8(a);
14987 }
14988
14989 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 {
14990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
14991 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)14992 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
14993 return vreinterpretq_u16_u32(a);
14994 }
14995
14996 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 {
14997 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
14998 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)14999 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
15000 return vreinterpretq_u16_u64(a);
15001 }
15002
15003 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 {
15004 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
15005 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)15006 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
15007 return vreinterpretq_u16_f16(a);
15008 }
15009
15010 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 {
15011 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
15012 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)15013 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
15014 return vreinterpretq_u16_f32(a);
15015 }
15016
15017 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 {
15018 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15019 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)15020 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
15021 return vreinterpretq_u16_p8(a);
15022 }
15023
15024 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 {
15025 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)15026 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
15027 return vreinterpretq_u16_p16(a);
15028 }
15029
15030 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 {
15031 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
15032 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)15033 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
15034 return vreinterpretq_u32_s8(a);
15035 }
15036
15037 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 {
15038 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
15039 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)15040 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
15041 return vreinterpretq_u32_s16(a);
15042 }
15043
15044 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 {
15045 // CHECK: ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)15046 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
15047 return vreinterpretq_u32_s32(a);
15048 }
15049
15050 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 {
15051 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
15052 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)15053 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
15054 return vreinterpretq_u32_s64(a);
15055 }
15056
15057 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 {
15058 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
15059 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)15060 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
15061 return vreinterpretq_u32_u8(a);
15062 }
15063
15064 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 {
15065 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
15066 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)15067 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
15068 return vreinterpretq_u32_u16(a);
15069 }
15070
15071 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 {
15072 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
15073 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)15074 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
15075 return vreinterpretq_u32_u64(a);
15076 }
15077
15078 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 {
15079 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
15080 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)15081 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
15082 return vreinterpretq_u32_f16(a);
15083 }
15084
15085 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 {
15086 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
15087 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)15088 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
15089 return vreinterpretq_u32_f32(a);
15090 }
15091
15092 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 {
15093 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
15094 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)15095 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
15096 return vreinterpretq_u32_p8(a);
15097 }
15098
15099 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 {
15100 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
15101 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)15102 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
15103 return vreinterpretq_u32_p16(a);
15104 }
15105
15106 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 {
15107 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
15108 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)15109 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
15110 return vreinterpretq_u64_s8(a);
15111 }
15112
15113 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 {
15114 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
15115 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)15116 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
15117 return vreinterpretq_u64_s16(a);
15118 }
15119
15120 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 {
15121 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
15122 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)15123 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
15124 return vreinterpretq_u64_s32(a);
15125 }
15126
15127 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 {
15128 // CHECK: ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)15129 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
15130 return vreinterpretq_u64_s64(a);
15131 }
15132
15133 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 {
15134 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
15135 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)15136 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
15137 return vreinterpretq_u64_u8(a);
15138 }
15139
15140 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 {
15141 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
15142 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)15143 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
15144 return vreinterpretq_u64_u16(a);
15145 }
15146
15147 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 {
15148 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
15149 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)15150 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
15151 return vreinterpretq_u64_u32(a);
15152 }
15153
15154 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 {
15155 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
15156 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)15157 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
15158 return vreinterpretq_u64_f16(a);
15159 }
15160
15161 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 {
15162 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
15163 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)15164 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
15165 return vreinterpretq_u64_f32(a);
15166 }
15167
15168 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 {
15169 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
15170 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)15171 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
15172 return vreinterpretq_u64_p8(a);
15173 }
15174
15175 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 {
15176 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
15177 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)15178 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
15179 return vreinterpretq_u64_p16(a);
15180 }
15181
15182 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 {
15183 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
15184 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)15185 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
15186 return vreinterpretq_f16_s8(a);
15187 }
15188
15189 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 {
15190 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
15191 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)15192 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
15193 return vreinterpretq_f16_s16(a);
15194 }
15195
15196 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 {
15197 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
15198 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)15199 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
15200 return vreinterpretq_f16_s32(a);
15201 }
15202
15203 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 {
15204 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
15205 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)15206 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
15207 return vreinterpretq_f16_s64(a);
15208 }
15209
15210 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 {
15211 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
15212 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)15213 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
15214 return vreinterpretq_f16_u8(a);
15215 }
15216
15217 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 {
15218 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
15219 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)15220 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
15221 return vreinterpretq_f16_u16(a);
15222 }
15223
15224 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 {
15225 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
15226 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)15227 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
15228 return vreinterpretq_f16_u32(a);
15229 }
15230
15231 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 {
15232 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
15233 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)15234 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
15235 return vreinterpretq_f16_u64(a);
15236 }
15237
15238 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 {
15239 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
15240 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)15241 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
15242 return vreinterpretq_f16_f32(a);
15243 }
15244
15245 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 {
15246 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
15247 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)15248 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
15249 return vreinterpretq_f16_p8(a);
15250 }
15251
15252 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 {
15253 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
15254 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)15255 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
15256 return vreinterpretq_f16_p16(a);
15257 }
15258
15259 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 {
15260 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
15261 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)15262 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
15263 return vreinterpretq_f32_s8(a);
15264 }
15265
15266 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 {
15267 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
15268 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)15269 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
15270 return vreinterpretq_f32_s16(a);
15271 }
15272
15273 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 {
15274 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
15275 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)15276 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
15277 return vreinterpretq_f32_s32(a);
15278 }
15279
15280 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 {
15281 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
15282 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)15283 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
15284 return vreinterpretq_f32_s64(a);
15285 }
15286
15287 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 {
15288 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
15289 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)15290 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
15291 return vreinterpretq_f32_u8(a);
15292 }
15293
15294 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 {
15295 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
15296 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)15297 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
15298 return vreinterpretq_f32_u16(a);
15299 }
15300
15301 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 {
15302 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
15303 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)15304 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
15305 return vreinterpretq_f32_u32(a);
15306 }
15307
15308 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 {
15309 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
15310 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)15311 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
15312 return vreinterpretq_f32_u64(a);
15313 }
15314
15315 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 {
15316 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
15317 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)15318 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
15319 return vreinterpretq_f32_f16(a);
15320 }
15321
15322 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 {
15323 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
15324 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)15325 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
15326 return vreinterpretq_f32_p8(a);
15327 }
15328
15329 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 {
15330 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
15331 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)15332 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
15333 return vreinterpretq_f32_p16(a);
15334 }
15335
15336 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 {
15337 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)15338 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
15339 return vreinterpretq_p8_s8(a);
15340 }
15341
15342 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 {
15343 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15344 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)15345 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
15346 return vreinterpretq_p8_s16(a);
15347 }
15348
15349 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 {
15350 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15351 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)15352 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
15353 return vreinterpretq_p8_s32(a);
15354 }
15355
15356 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 {
15357 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15358 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)15359 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
15360 return vreinterpretq_p8_s64(a);
15361 }
15362
15363 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 {
15364 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)15365 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
15366 return vreinterpretq_p8_u8(a);
15367 }
15368
15369 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 {
15370 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15371 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)15372 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
15373 return vreinterpretq_p8_u16(a);
15374 }
15375
15376 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 {
15377 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15378 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)15379 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
15380 return vreinterpretq_p8_u32(a);
15381 }
15382
15383 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 {
15384 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15385 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)15386 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
15387 return vreinterpretq_p8_u64(a);
15388 }
15389
15390 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 {
15391 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
15392 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)15393 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
15394 return vreinterpretq_p8_f16(a);
15395 }
15396
15397 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 {
15398 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
15399 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)15400 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
15401 return vreinterpretq_p8_f32(a);
15402 }
15403
15404 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 {
15405 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15406 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)15407 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
15408 return vreinterpretq_p8_p16(a);
15409 }
15410
15411 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 {
15412 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15413 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)15414 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
15415 return vreinterpretq_p16_s8(a);
15416 }
15417
15418 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 {
15419 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)15420 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
15421 return vreinterpretq_p16_s16(a);
15422 }
15423
15424 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 {
15425 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15426 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)15427 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
15428 return vreinterpretq_p16_s32(a);
15429 }
15430
15431 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 {
15432 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15433 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)15434 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
15435 return vreinterpretq_p16_s64(a);
15436 }
15437
15438 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 {
15439 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15440 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)15441 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
15442 return vreinterpretq_p16_u8(a);
15443 }
15444
15445 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 {
15446 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)15447 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
15448 return vreinterpretq_p16_u16(a);
15449 }
15450
15451 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 {
15452 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15453 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)15454 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
15455 return vreinterpretq_p16_u32(a);
15456 }
15457
15458 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 {
15459 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15460 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)15461 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
15462 return vreinterpretq_p16_u64(a);
15463 }
15464
15465 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 {
15466 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
15467 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)15468 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
15469 return vreinterpretq_p16_f16(a);
15470 }
15471
15472 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 {
15473 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
15474 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)15475 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
15476 return vreinterpretq_p16_f32(a);
15477 }
15478
15479 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 {
15480 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15481 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)15482 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
15483 return vreinterpretq_p16_p8(a);
15484 }
15485
15486
15487 // CHECK-LABEL: define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
15488 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
15489 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_s8(int8x8_t a)15490 int8x8_t test_vrev16_s8(int8x8_t a) {
15491 return vrev16_s8(a);
15492 }
15493
15494 // CHECK-LABEL: define <8 x i8> @test_vrev16_u8(<8 x i8> %a) #0 {
15495 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
15496 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_u8(uint8x8_t a)15497 uint8x8_t test_vrev16_u8(uint8x8_t a) {
15498 return vrev16_u8(a);
15499 }
15500
15501 // CHECK-LABEL: define <8 x i8> @test_vrev16_p8(<8 x i8> %a) #0 {
15502 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
15503 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_p8(poly8x8_t a)15504 poly8x8_t test_vrev16_p8(poly8x8_t a) {
15505 return vrev16_p8(a);
15506 }
15507
15508 // CHECK-LABEL: define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
15509 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
15510 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_s8(int8x16_t a)15511 int8x16_t test_vrev16q_s8(int8x16_t a) {
15512 return vrev16q_s8(a);
15513 }
15514
15515 // CHECK-LABEL: define <16 x i8> @test_vrev16q_u8(<16 x i8> %a) #0 {
15516 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
15517 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_u8(uint8x16_t a)15518 uint8x16_t test_vrev16q_u8(uint8x16_t a) {
15519 return vrev16q_u8(a);
15520 }
15521
15522 // CHECK-LABEL: define <16 x i8> @test_vrev16q_p8(<16 x i8> %a) #0 {
15523 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
15524 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_p8(poly8x16_t a)15525 poly8x16_t test_vrev16q_p8(poly8x16_t a) {
15526 return vrev16q_p8(a);
15527 }
15528
15529
15530 // CHECK-LABEL: define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
15531 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
15532 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_s8(int8x8_t a)15533 int8x8_t test_vrev32_s8(int8x8_t a) {
15534 return vrev32_s8(a);
15535 }
15536
15537 // CHECK-LABEL: define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
15538 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
15539 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_s16(int16x4_t a)15540 int16x4_t test_vrev32_s16(int16x4_t a) {
15541 return vrev32_s16(a);
15542 }
15543
15544 // CHECK-LABEL: define <8 x i8> @test_vrev32_u8(<8 x i8> %a) #0 {
15545 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
15546 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_u8(uint8x8_t a)15547 uint8x8_t test_vrev32_u8(uint8x8_t a) {
15548 return vrev32_u8(a);
15549 }
15550
15551 // CHECK-LABEL: define <4 x i16> @test_vrev32_u16(<4 x i16> %a) #0 {
15552 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
15553 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_u16(uint16x4_t a)15554 uint16x4_t test_vrev32_u16(uint16x4_t a) {
15555 return vrev32_u16(a);
15556 }
15557
15558 // CHECK-LABEL: define <8 x i8> @test_vrev32_p8(<8 x i8> %a) #0 {
15559 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
15560 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_p8(poly8x8_t a)15561 poly8x8_t test_vrev32_p8(poly8x8_t a) {
15562 return vrev32_p8(a);
15563 }
15564
15565 // CHECK-LABEL: define <4 x i16> @test_vrev32_p16(<4 x i16> %a) #0 {
15566 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
15567 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_p16(poly16x4_t a)15568 poly16x4_t test_vrev32_p16(poly16x4_t a) {
15569 return vrev32_p16(a);
15570 }
15571
15572 // CHECK-LABEL: define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
15573 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
15574 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_s8(int8x16_t a)15575 int8x16_t test_vrev32q_s8(int8x16_t a) {
15576 return vrev32q_s8(a);
15577 }
15578
15579 // CHECK-LABEL: define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
15580 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
15581 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_s16(int16x8_t a)15582 int16x8_t test_vrev32q_s16(int16x8_t a) {
15583 return vrev32q_s16(a);
15584 }
15585
15586 // CHECK-LABEL: define <16 x i8> @test_vrev32q_u8(<16 x i8> %a) #0 {
15587 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
15588 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_u8(uint8x16_t a)15589 uint8x16_t test_vrev32q_u8(uint8x16_t a) {
15590 return vrev32q_u8(a);
15591 }
15592
15593 // CHECK-LABEL: define <8 x i16> @test_vrev32q_u16(<8 x i16> %a) #0 {
15594 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
15595 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_u16(uint16x8_t a)15596 uint16x8_t test_vrev32q_u16(uint16x8_t a) {
15597 return vrev32q_u16(a);
15598 }
15599
15600 // CHECK-LABEL: define <16 x i8> @test_vrev32q_p8(<16 x i8> %a) #0 {
15601 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
15602 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_p8(poly8x16_t a)15603 poly8x16_t test_vrev32q_p8(poly8x16_t a) {
15604 return vrev32q_p8(a);
15605 }
15606
15607 // CHECK-LABEL: define <8 x i16> @test_vrev32q_p16(<8 x i16> %a) #0 {
15608 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
15609 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_p16(poly16x8_t a)15610 poly16x8_t test_vrev32q_p16(poly16x8_t a) {
15611 return vrev32q_p16(a);
15612 }
15613
15614
15615 // CHECK-LABEL: define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
15616 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
15617 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_s8(int8x8_t a)15618 int8x8_t test_vrev64_s8(int8x8_t a) {
15619 return vrev64_s8(a);
15620 }
15621
15622 // CHECK-LABEL: define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
15623 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
15624 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_s16(int16x4_t a)15625 int16x4_t test_vrev64_s16(int16x4_t a) {
15626 return vrev64_s16(a);
15627 }
15628
15629 // CHECK-LABEL: define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
15630 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
15631 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_s32(int32x2_t a)15632 int32x2_t test_vrev64_s32(int32x2_t a) {
15633 return vrev64_s32(a);
15634 }
15635
15636 // CHECK-LABEL: define <8 x i8> @test_vrev64_u8(<8 x i8> %a) #0 {
15637 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
15638 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_u8(uint8x8_t a)15639 uint8x8_t test_vrev64_u8(uint8x8_t a) {
15640 return vrev64_u8(a);
15641 }
15642
15643 // CHECK-LABEL: define <4 x i16> @test_vrev64_u16(<4 x i16> %a) #0 {
15644 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
15645 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_u16(uint16x4_t a)15646 uint16x4_t test_vrev64_u16(uint16x4_t a) {
15647 return vrev64_u16(a);
15648 }
15649
15650 // CHECK-LABEL: define <2 x i32> @test_vrev64_u32(<2 x i32> %a) #0 {
15651 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
15652 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_u32(uint32x2_t a)15653 uint32x2_t test_vrev64_u32(uint32x2_t a) {
15654 return vrev64_u32(a);
15655 }
15656
15657 // CHECK-LABEL: define <8 x i8> @test_vrev64_p8(<8 x i8> %a) #0 {
15658 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
15659 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_p8(poly8x8_t a)15660 poly8x8_t test_vrev64_p8(poly8x8_t a) {
15661 return vrev64_p8(a);
15662 }
15663
15664 // CHECK-LABEL: define <4 x i16> @test_vrev64_p16(<4 x i16> %a) #0 {
15665 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
15666 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_p16(poly16x4_t a)15667 poly16x4_t test_vrev64_p16(poly16x4_t a) {
15668 return vrev64_p16(a);
15669 }
15670
15671 // CHECK-LABEL: define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
15672 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
15673 // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vrev64_f32(float32x2_t a)15674 float32x2_t test_vrev64_f32(float32x2_t a) {
15675 return vrev64_f32(a);
15676 }
15677
15678 // CHECK-LABEL: define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
15679 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
15680 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_s8(int8x16_t a)15681 int8x16_t test_vrev64q_s8(int8x16_t a) {
15682 return vrev64q_s8(a);
15683 }
15684
15685 // CHECK-LABEL: define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
15686 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
15687 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_s16(int16x8_t a)15688 int16x8_t test_vrev64q_s16(int16x8_t a) {
15689 return vrev64q_s16(a);
15690 }
15691
15692 // CHECK-LABEL: define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
15693 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
15694 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_s32(int32x4_t a)15695 int32x4_t test_vrev64q_s32(int32x4_t a) {
15696 return vrev64q_s32(a);
15697 }
15698
15699 // CHECK-LABEL: define <16 x i8> @test_vrev64q_u8(<16 x i8> %a) #0 {
15700 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
15701 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_u8(uint8x16_t a)15702 uint8x16_t test_vrev64q_u8(uint8x16_t a) {
15703 return vrev64q_u8(a);
15704 }
15705
15706 // CHECK-LABEL: define <8 x i16> @test_vrev64q_u16(<8 x i16> %a) #0 {
15707 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
15708 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_u16(uint16x8_t a)15709 uint16x8_t test_vrev64q_u16(uint16x8_t a) {
15710 return vrev64q_u16(a);
15711 }
15712
15713 // CHECK-LABEL: define <4 x i32> @test_vrev64q_u32(<4 x i32> %a) #0 {
15714 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
15715 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_u32(uint32x4_t a)15716 uint32x4_t test_vrev64q_u32(uint32x4_t a) {
15717 return vrev64q_u32(a);
15718 }
15719
15720 // CHECK-LABEL: define <16 x i8> @test_vrev64q_p8(<16 x i8> %a) #0 {
15721 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
15722 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_p8(poly8x16_t a)15723 poly8x16_t test_vrev64q_p8(poly8x16_t a) {
15724 return vrev64q_p8(a);
15725 }
15726
15727 // CHECK-LABEL: define <8 x i16> @test_vrev64q_p16(<8 x i16> %a) #0 {
15728 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
15729 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_p16(poly16x8_t a)15730 poly16x8_t test_vrev64q_p16(poly16x8_t a) {
15731 return vrev64q_p16(a);
15732 }
15733
15734 // CHECK-LABEL: define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
15735 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
15736 // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vrev64q_f32(float32x4_t a)15737 float32x4_t test_vrev64q_f32(float32x4_t a) {
15738 return vrev64q_f32(a);
15739 }
15740
15741
15742 // CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
15743 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4
15744 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t a,int8x8_t b)15745 int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) {
15746 return vrhadd_s8(a, b);
15747 }
15748
15749 // CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
15750 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15751 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15752 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15753 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15754 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
15755 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
15756 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
15757 // CHECK: ret <4 x i16> [[TMP2]]
test_vrhadd_s16(int16x4_t a,int16x4_t b)15758 int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) {
15759 return vrhadd_s16(a, b);
15760 }
15761
15762 // CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
15763 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15764 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15765 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15766 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15767 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
15768 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
15769 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
15770 // CHECK: ret <2 x i32> [[TMP2]]
test_vrhadd_s32(int32x2_t a,int32x2_t b)15771 int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) {
15772 return vrhadd_s32(a, b);
15773 }
15774
15775 // CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
15776 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
15777 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t a,uint8x8_t b)15778 uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) {
15779 return vrhadd_u8(a, b);
15780 }
15781
15782 // CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
15783 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15784 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15785 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15786 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15787 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
15788 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
15789 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
15790 // CHECK: ret <4 x i16> [[TMP2]]
test_vrhadd_u16(uint16x4_t a,uint16x4_t b)15791 uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) {
15792 return vrhadd_u16(a, b);
15793 }
15794
15795 // CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
15796 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15797 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15798 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15799 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15800 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
15801 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
15802 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
15803 // CHECK: ret <2 x i32> [[TMP2]]
test_vrhadd_u32(uint32x2_t a,uint32x2_t b)15804 uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) {
15805 return vrhadd_u32(a, b);
15806 }
15807
15808 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
15809 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4
15810 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t a,int8x16_t b)15811 int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) {
15812 return vrhaddq_s8(a, b);
15813 }
15814
15815 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
15816 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15817 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15818 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15819 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15820 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
15821 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
15822 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
15823 // CHECK: ret <8 x i16> [[TMP2]]
test_vrhaddq_s16(int16x8_t a,int16x8_t b)15824 int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) {
15825 return vrhaddq_s16(a, b);
15826 }
15827
15828 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
15829 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15830 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15831 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15832 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15833 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
15834 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
15835 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
15836 // CHECK: ret <4 x i32> [[TMP2]]
test_vrhaddq_s32(int32x4_t a,int32x4_t b)15837 int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) {
15838 return vrhaddq_s32(a, b);
15839 }
15840
15841 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
15842 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
15843 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t a,uint8x16_t b)15844 uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) {
15845 return vrhaddq_u8(a, b);
15846 }
15847
15848 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
15849 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15850 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15851 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15852 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15853 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
15854 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
15855 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
15856 // CHECK: ret <8 x i16> [[TMP2]]
test_vrhaddq_u16(uint16x8_t a,uint16x8_t b)15857 uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) {
15858 return vrhaddq_u16(a, b);
15859 }
15860
15861 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
15862 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15863 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15864 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15865 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15866 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
15867 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
15868 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
15869 // CHECK: ret <4 x i32> [[TMP2]]
test_vrhaddq_u32(uint32x4_t a,uint32x4_t b)15870 uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) {
15871 return vrhaddq_u32(a, b);
15872 }
15873
15874
15875 // CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
15876 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4
15877 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)15878 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
15879 return vrshl_s8(a, b);
15880 }
15881
15882 // CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
15883 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15884 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15885 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15886 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15887 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
15888 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
15889 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
15890 // CHECK: ret <4 x i16> [[TMP2]]
test_vrshl_s16(int16x4_t a,int16x4_t b)15891 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
15892 return vrshl_s16(a, b);
15893 }
15894
15895 // CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
15896 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15897 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15898 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15899 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15900 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
15901 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
15902 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
15903 // CHECK: ret <2 x i32> [[TMP2]]
test_vrshl_s32(int32x2_t a,int32x2_t b)15904 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
15905 return vrshl_s32(a, b);
15906 }
15907
15908 // CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
15909 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15910 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15911 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15912 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15913 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
15914 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
15915 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
15916 // CHECK: ret <1 x i64> [[TMP2]]
test_vrshl_s64(int64x1_t a,int64x1_t b)15917 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
15918 return vrshl_s64(a, b);
15919 }
15920
15921 // CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
15922 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
15923 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)15924 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
15925 return vrshl_u8(a, b);
15926 }
15927
15928 // CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
15929 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15930 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15931 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15932 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15933 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
15934 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
15935 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
15936 // CHECK: ret <4 x i16> [[TMP2]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)15937 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
15938 return vrshl_u16(a, b);
15939 }
15940
15941 // CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
15942 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15943 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15944 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15945 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15946 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
15947 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
15948 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
15949 // CHECK: ret <2 x i32> [[TMP2]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)15950 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
15951 return vrshl_u32(a, b);
15952 }
15953
15954 // CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
15955 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15956 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15957 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15958 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15959 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
15960 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
15961 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
15962 // CHECK: ret <1 x i64> [[TMP2]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)15963 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
15964 return vrshl_u64(a, b);
15965 }
15966
15967 // CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
15968 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4
15969 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)15970 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
15971 return vrshlq_s8(a, b);
15972 }
15973
15974 // CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
15975 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15976 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15977 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15978 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15979 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
15980 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
15981 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
15982 // CHECK: ret <8 x i16> [[TMP2]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)15983 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
15984 return vrshlq_s16(a, b);
15985 }
15986
15987 // CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
15988 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15989 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15990 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15991 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15992 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
15993 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
15994 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
15995 // CHECK: ret <4 x i32> [[TMP2]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)15996 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
15997 return vrshlq_s32(a, b);
15998 }
15999
16000 // CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
16001 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16002 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16003 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16004 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
16005 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
16006 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
16007 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
16008 // CHECK: ret <2 x i64> [[TMP2]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)16009 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
16010 return vrshlq_s64(a, b);
16011 }
16012
16013 // CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
16014 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
16015 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)16016 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
16017 return vrshlq_u8(a, b);
16018 }
16019
16020 // CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
16021 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16022 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16023 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16024 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
16025 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
16026 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
16027 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
16028 // CHECK: ret <8 x i16> [[TMP2]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)16029 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
16030 return vrshlq_u16(a, b);
16031 }
16032
16033 // CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
16034 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16035 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16036 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16037 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
16038 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
16039 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
16040 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
16041 // CHECK: ret <4 x i32> [[TMP2]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)16042 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
16043 return vrshlq_u32(a, b);
16044 }
16045
16046 // CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
16047 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16048 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16049 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16050 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
16051 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
16052 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
16053 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
16054 // CHECK: ret <2 x i64> [[TMP2]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)16055 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
16056 return vrshlq_u64(a, b);
16057 }
16058
16059
16060 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 {
16061 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16062 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16063 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
16064 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)16065 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
16066 return vrshrn_n_s16(a, 1);
16067 }
16068
16069 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 {
16070 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16071 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16072 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
16073 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)16074 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
16075 return vrshrn_n_s32(a, 1);
16076 }
16077
16078 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 {
16079 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16080 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16081 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
16082 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)16083 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
16084 return vrshrn_n_s64(a, 1);
16085 }
16086
16087 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 {
16088 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16089 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16090 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
16091 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)16092 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
16093 return vrshrn_n_u16(a, 1);
16094 }
16095
16096 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 {
16097 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16098 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16099 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
16100 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)16101 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
16102 return vrshrn_n_u32(a, 1);
16103 }
16104
16105 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 {
16106 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16107 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16108 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
16109 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)16110 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
16111 return vrshrn_n_u64(a, 1);
16112 }
16113
16114
16115 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 {
16116 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
16117 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)16118 int8x8_t test_vrshr_n_s8(int8x8_t a) {
16119 return vrshr_n_s8(a, 1);
16120 }
16121
16122 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 {
16123 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16124 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16125 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
16126 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)16127 int16x4_t test_vrshr_n_s16(int16x4_t a) {
16128 return vrshr_n_s16(a, 1);
16129 }
16130
16131 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 {
16132 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16133 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
16134 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
16135 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)16136 int32x2_t test_vrshr_n_s32(int32x2_t a) {
16137 return vrshr_n_s32(a, 1);
16138 }
16139
16140 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 {
16141 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16142 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
16143 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
16144 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)16145 int64x1_t test_vrshr_n_s64(int64x1_t a) {
16146 return vrshr_n_s64(a, 1);
16147 }
16148
16149 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 {
16150 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
16151 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(uint8x8_t a)16152 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
16153 return vrshr_n_u8(a, 1);
16154 }
16155
16156 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 {
16157 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16158 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16159 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
16160 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(uint16x4_t a)16161 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
16162 return vrshr_n_u16(a, 1);
16163 }
16164
16165 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 {
16166 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16167 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
16168 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
16169 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(uint32x2_t a)16170 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
16171 return vrshr_n_u32(a, 1);
16172 }
16173
16174 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 {
16175 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16176 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
16177 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
16178 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)16179 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
16180 return vrshr_n_u64(a, 1);
16181 }
16182
16183 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 {
16184 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
16185 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)16186 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
16187 return vrshrq_n_s8(a, 1);
16188 }
16189
16190 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 {
16191 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16192 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16193 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
16194 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)16195 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
16196 return vrshrq_n_s16(a, 1);
16197 }
16198
16199 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 {
16200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16201 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16202 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
16203 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)16204 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
16205 return vrshrq_n_s32(a, 1);
16206 }
16207
16208 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 {
16209 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16210 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16211 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
16212 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)16213 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
16214 return vrshrq_n_s64(a, 1);
16215 }
16216
16217 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 {
16218 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
16219 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(uint8x16_t a)16220 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
16221 return vrshrq_n_u8(a, 1);
16222 }
16223
16224 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 {
16225 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16226 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16227 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
16228 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(uint16x8_t a)16229 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
16230 return vrshrq_n_u16(a, 1);
16231 }
16232
16233 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 {
16234 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16235 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16236 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
16237 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(uint32x4_t a)16238 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
16239 return vrshrq_n_u32(a, 1);
16240 }
16241
16242 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 {
16243 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16244 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16245 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
16246 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(uint64x2_t a)16247 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
16248 return vrshrq_n_u64(a, 1);
16249 }
16250
16251
16252 // CHECK-LABEL: define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 {
16253 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
16254 // CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
16255 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> [[VRSQRTE_V_I]]) #4
16256 // CHECK: ret <2 x float> [[VRSQRTE_V1_I]]
test_vrsqrte_f32(float32x2_t a)16257 float32x2_t test_vrsqrte_f32(float32x2_t a) {
16258 return vrsqrte_f32(a);
16259 }
16260
16261 // CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 {
16262 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16263 // CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
16264 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4
16265 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)16266 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
16267 return vrsqrte_u32(a);
16268 }
16269
16270 // CHECK-LABEL: define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 {
16271 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16272 // CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
16273 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> [[VRSQRTEQ_V_I]]) #4
16274 // CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_f32(float32x4_t a)16275 float32x4_t test_vrsqrteq_f32(float32x4_t a) {
16276 return vrsqrteq_f32(a);
16277 }
16278
16279 // CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 {
16280 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16281 // CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16282 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4
16283 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)16284 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
16285 return vrsqrteq_u32(a);
16286 }
16287
16288
16289 // CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %a, <2 x float> %b) #0 {
16290 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
16291 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
16292 // CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
16293 // CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
16294 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4
16295 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
16296 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float>
16297 // CHECK: ret <2 x float> [[TMP2]]
test_vrsqrts_f32(float32x2_t a,float32x2_t b)16298 float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) {
16299 return vrsqrts_f32(a, b);
16300 }
16301
16302 // CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %a, <4 x float> %b) #0 {
16303 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16304 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
16305 // CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
16306 // CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
16307 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4
16308 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
16309 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float>
16310 // CHECK: ret <4 x float> [[TMP2]]
test_vrsqrtsq_f32(float32x4_t a,float32x4_t b)16311 float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) {
16312 return vrsqrtsq_f32(a, b);
16313 }
16314
16315
16316 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
16317 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
16318 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
16319 // CHECK: ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)16320 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
16321 return vrsra_n_s8(a, b, 1);
16322 }
16323
16324 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
16325 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16326 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
16327 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16328 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
16329 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
16330 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
16331 // CHECK: ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)16332 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
16333 return vrsra_n_s16(a, b, 1);
16334 }
16335
16336 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
16337 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16338 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
16339 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
16340 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
16341 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
16342 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
16343 // CHECK: ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)16344 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
16345 return vrsra_n_s32(a, b, 1);
16346 }
16347
16348 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
16349 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16350 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16351 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
16352 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
16353 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
16354 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
16355 // CHECK: ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)16356 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
16357 return vrsra_n_s64(a, b, 1);
16358 }
16359
16360 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
16361 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
16362 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
16363 // CHECK: ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_u8(uint8x8_t a,uint8x8_t b)16364 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
16365 return vrsra_n_u8(a, b, 1);
16366 }
16367
16368 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
16369 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16370 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
16371 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16372 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
16373 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
16374 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
16375 // CHECK: ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_u16(uint16x4_t a,uint16x4_t b)16376 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
16377 return vrsra_n_u16(a, b, 1);
16378 }
16379
16380 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
16381 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16382 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
16383 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
16384 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
16385 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
16386 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
16387 // CHECK: ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_u32(uint32x2_t a,uint32x2_t b)16388 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
16389 return vrsra_n_u32(a, b, 1);
16390 }
16391
16392 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
16393 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16394 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16395 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
16396 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
16397 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
16398 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
16399 // CHECK: ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)16400 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
16401 return vrsra_n_u64(a, b, 1);
16402 }
16403
16404 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
16405 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
16406 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
16407 // CHECK: ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)16408 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
16409 return vrsraq_n_s8(a, b, 1);
16410 }
16411
16412 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
16413 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16414 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16415 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16416 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
16417 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
16418 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
16419 // CHECK: ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)16420 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
16421 return vrsraq_n_s16(a, b, 1);
16422 }
16423
16424 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
16425 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16426 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16427 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16428 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
16429 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
16430 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
16431 // CHECK: ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)16432 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
16433 return vrsraq_n_s32(a, b, 1);
16434 }
16435
16436 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
16437 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16438 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16439 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16440 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
16441 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
16442 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
16443 // CHECK: ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)16444 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
16445 return vrsraq_n_s64(a, b, 1);
16446 }
16447
16448 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
16449 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
16450 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
16451 // CHECK: ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_u8(uint8x16_t a,uint8x16_t b)16452 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
16453 return vrsraq_n_u8(a, b, 1);
16454 }
16455
16456 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
16457 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16458 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16459 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16460 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
16461 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
16462 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
16463 // CHECK: ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_u16(uint16x8_t a,uint16x8_t b)16464 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
16465 return vrsraq_n_u16(a, b, 1);
16466 }
16467
16468 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
16469 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16470 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16471 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16472 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
16473 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
16474 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
16475 // CHECK: ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_u32(uint32x4_t a,uint32x4_t b)16476 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
16477 return vrsraq_n_u32(a, b, 1);
16478 }
16479
16480 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
16481 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16482 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16483 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16484 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
16485 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
16486 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
16487 // CHECK: ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_u64(uint64x2_t a,uint64x2_t b)16488 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
16489 return vrsraq_n_u64(a, b, 1);
16490 }
16491
16492
16493 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
16494 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16495 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16496 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16497 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
16498 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
16499 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)16500 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
16501 return vrsubhn_s16(a, b);
16502 }
16503
16504 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
16505 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16506 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16507 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16508 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
16509 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
16510 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
16511 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
16512 // CHECK: ret <4 x i16> [[TMP2]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)16513 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
16514 return vrsubhn_s32(a, b);
16515 }
16516
16517 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
16518 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16519 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16520 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16521 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
16522 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
16523 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
16524 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
16525 // CHECK: ret <2 x i32> [[TMP2]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)16526 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
16527 return vrsubhn_s64(a, b);
16528 }
16529
16530 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
16531 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16532 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16533 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16534 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
16535 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
16536 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)16537 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
16538 return vrsubhn_u16(a, b);
16539 }
16540
16541 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
16542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16543 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16544 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16545 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
16546 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
16547 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
16548 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
16549 // CHECK: ret <4 x i16> [[TMP2]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)16550 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
16551 return vrsubhn_u32(a, b);
16552 }
16553
16554 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
16555 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16556 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16557 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16558 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
16559 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
16560 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
16561 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
16562 // CHECK: ret <2 x i32> [[TMP2]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)16563 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
16564 return vrsubhn_u64(a, b);
16565 }
16566
16567
16568 // CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 zeroext %a, <8 x i8> %b) #0 {
16569 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
16570 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_u8(uint8_t a,uint8x8_t b)16571 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
16572 return vset_lane_u8(a, b, 7);
16573 }
16574
16575 // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 zeroext %a, <4 x i16> %b) #0 {
16576 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
16577 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16578 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
16579 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_u16(uint16_t a,uint16x4_t b)16580 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
16581 return vset_lane_u16(a, b, 3);
16582 }
16583
16584 // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 {
16585 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
16586 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
16587 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
16588 // CHECK: ret <2 x i32> [[VSET_LANE]]
test_vset_lane_u32(uint32_t a,uint32x2_t b)16589 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
16590 return vset_lane_u32(a, b, 1);
16591 }
16592
16593 // CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 signext %a, <8 x i8> %b) #0 {
16594 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
16595 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_s8(int8_t a,int8x8_t b)16596 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
16597 return vset_lane_s8(a, b, 7);
16598 }
16599
16600 // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 signext %a, <4 x i16> %b) #0 {
16601 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
16602 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16603 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
16604 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_s16(int16_t a,int16x4_t b)16605 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
16606 return vset_lane_s16(a, b, 3);
16607 }
16608
16609 // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 {
16610 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
16611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
16612 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
16613 // CHECK: ret <2 x i32> [[VSET_LANE]]
test_vset_lane_s32(int32_t a,int32x2_t b)16614 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
16615 return vset_lane_s32(a, b, 1);
16616 }
16617
16618 // CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 signext %a, <8 x i8> %b) #0 {
16619 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
16620 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_p8(poly8_t a,poly8x8_t b)16621 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
16622 return vset_lane_p8(a, b, 7);
16623 }
16624
16625 // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 signext %a, <4 x i16> %b) #0 {
16626 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
16627 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16628 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
16629 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_p16(poly16_t a,poly16x4_t b)16630 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
16631 return vset_lane_p16(a, b, 3);
16632 }
16633
16634 // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 {
16635 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
16636 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
16637 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1
16638 // CHECK: ret <2 x float> [[VSET_LANE]]
test_vset_lane_f32(float32_t a,float32x2_t b)16639 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
16640 return vset_lane_f32(a, b, 1);
16641 }
16642
16643 // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 {
16644 // CHECK: [[__REINT_246:%.*]] = alloca half, align 2
16645 // CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
16646 // CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
16647 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
16648 // CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2
16649 // CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
16650 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
16651 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
16652 // CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
16653 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
16654 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16655 // CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16656 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 1
16657 // CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
16658 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
16659 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
16660 // CHECK: ret <4 x half> [[TMP8]]
test_vset_lane_f16(float16_t * a,float16x4_t b)16661 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
16662 return vset_lane_f16(*a, b, 1);
16663 }
16664
16665 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 zeroext %a, <16 x i8> %b) #0 {
16666 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
16667 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_u8(uint8_t a,uint8x16_t b)16668 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
16669 return vsetq_lane_u8(a, b, 15);
16670 }
16671
16672 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 zeroext %a, <8 x i16> %b) #0 {
16673 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16674 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16675 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
16676 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_u16(uint16_t a,uint16x8_t b)16677 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
16678 return vsetq_lane_u16(a, b, 7);
16679 }
16680
16681 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 {
16682 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16683 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16684 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
16685 // CHECK: ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_u32(uint32_t a,uint32x4_t b)16686 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
16687 return vsetq_lane_u32(a, b, 3);
16688 }
16689
16690 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 signext %a, <16 x i8> %b) #0 {
16691 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
16692 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_s8(int8_t a,int8x16_t b)16693 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
16694 return vsetq_lane_s8(a, b, 15);
16695 }
16696
16697 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 signext %a, <8 x i16> %b) #0 {
16698 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16699 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16700 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
16701 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_s16(int16_t a,int16x8_t b)16702 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
16703 return vsetq_lane_s16(a, b, 7);
16704 }
16705
16706 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 {
16707 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16708 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16709 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
16710 // CHECK: ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_s32(int32_t a,int32x4_t b)16711 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
16712 return vsetq_lane_s32(a, b, 3);
16713 }
16714
16715 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 signext %a, <16 x i8> %b) #0 {
16716 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
16717 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_p8(poly8_t a,poly8x16_t b)16718 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
16719 return vsetq_lane_p8(a, b, 15);
16720 }
16721
16722 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 signext %a, <8 x i16> %b) #0 {
16723 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16724 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16725 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
16726 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_p16(poly16_t a,poly16x8_t b)16727 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
16728 return vsetq_lane_p16(a, b, 7);
16729 }
16730
16731 // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 {
16732 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
16733 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
16734 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
16735 // CHECK: ret <4 x float> [[VSET_LANE]]
test_vsetq_lane_f32(float32_t a,float32x4_t b)16736 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
16737 return vsetq_lane_f32(a, b, 3);
16738 }
16739
16740 // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 {
16741 // CHECK: [[__REINT_248:%.*]] = alloca half, align 2
16742 // CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
16743 // CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
16744 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
16745 // CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2
16746 // CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
16747 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
16748 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
16749 // CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
16750 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
16751 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16752 // CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16753 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 3
16754 // CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
16755 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
16756 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
16757 // CHECK: ret <8 x half> [[TMP8]]
test_vsetq_lane_f16(float16_t * a,float16x8_t b)16758 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
16759 return vsetq_lane_f16(*a, b, 3);
16760 }
16761
16762 // The optimizer is able to get rid of all moves now.
16763 // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 {
16764 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16765 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
16766 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
16767 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vset_lane_s64(int64_t a,int64x1_t b)16768 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
16769 return vset_lane_s64(a, b, 0);
16770 }
16771
16772 // The optimizer is able to get rid of all moves now.
16773 // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 {
16774 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16775 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
16776 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
16777 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vset_lane_u64(uint64_t a,uint64x1_t b)16778 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
16779 return vset_lane_u64(a, b, 0);
16780 }
16781
16782 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 {
16783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16784 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16785 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
16786 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_s64(int64_t a,int64x2_t b)16787 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
16788 return vsetq_lane_s64(a, b, 1);
16789 }
16790
16791 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 {
16792 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16793 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16794 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
16795 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_u64(uint64_t a,uint64x2_t b)16796 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
16797 return vsetq_lane_u64(a, b, 1);
16798 }
16799
16800
16801 // CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
16802 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4
16803 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)16804 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
16805 return vshl_s8(a, b);
16806 }
16807
16808 // CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
16809 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16810 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
16811 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16812 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
16813 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
16814 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
16815 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
16816 // CHECK: ret <4 x i16> [[TMP2]]
test_vshl_s16(int16x4_t a,int16x4_t b)16817 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
16818 return vshl_s16(a, b);
16819 }
16820
16821 // CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
16822 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16823 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
16824 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
16825 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
16826 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
16827 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
16828 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
16829 // CHECK: ret <2 x i32> [[TMP2]]
test_vshl_s32(int32x2_t a,int32x2_t b)16830 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
16831 return vshl_s32(a, b);
16832 }
16833
16834 // CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
16835 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16836 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16837 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
16838 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
16839 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
16840 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
16841 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
16842 // CHECK: ret <1 x i64> [[TMP2]]
test_vshl_s64(int64x1_t a,int64x1_t b)16843 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
16844 return vshl_s64(a, b);
16845 }
16846
16847 // CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
16848 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4
16849 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)16850 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
16851 return vshl_u8(a, b);
16852 }
16853
16854 // CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
16855 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16856 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
16857 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16858 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
16859 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
16860 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
16861 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
16862 // CHECK: ret <4 x i16> [[TMP2]]
test_vshl_u16(uint16x4_t a,int16x4_t b)16863 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
16864 return vshl_u16(a, b);
16865 }
16866
16867 // CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
16868 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16869 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
16870 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
16871 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
16872 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
16873 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
16874 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
16875 // CHECK: ret <2 x i32> [[TMP2]]
test_vshl_u32(uint32x2_t a,int32x2_t b)16876 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
16877 return vshl_u32(a, b);
16878 }
16879
16880 // CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
16881 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16882 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16883 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
16884 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
16885 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
16886 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
16887 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
16888 // CHECK: ret <1 x i64> [[TMP2]]
test_vshl_u64(uint64x1_t a,int64x1_t b)16889 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
16890 return vshl_u64(a, b);
16891 }
16892
16893 // CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
16894 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4
16895 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)16896 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
16897 return vshlq_s8(a, b);
16898 }
16899
16900 // CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
16901 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16902 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16903 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16904 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
16905 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
16906 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
16907 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
16908 // CHECK: ret <8 x i16> [[TMP2]]
test_vshlq_s16(int16x8_t a,int16x8_t b)16909 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
16910 return vshlq_s16(a, b);
16911 }
16912
16913 // CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
16914 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16915 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16916 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16917 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
16918 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
16919 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
16920 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
16921 // CHECK: ret <4 x i32> [[TMP2]]
test_vshlq_s32(int32x4_t a,int32x4_t b)16922 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
16923 return vshlq_s32(a, b);
16924 }
16925
16926 // CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
16927 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16928 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16929 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16930 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
16931 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
16932 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
16933 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
16934 // CHECK: ret <2 x i64> [[TMP2]]
test_vshlq_s64(int64x2_t a,int64x2_t b)16935 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
16936 return vshlq_s64(a, b);
16937 }
16938
16939 // CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
16940 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4
16941 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)16942 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
16943 return vshlq_u8(a, b);
16944 }
16945
16946 // CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
16947 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16948 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16949 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
16950 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
16951 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
16952 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
16953 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
16954 // CHECK: ret <8 x i16> [[TMP2]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)16955 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
16956 return vshlq_u16(a, b);
16957 }
16958
16959 // CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
16960 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16961 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16962 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
16963 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
16964 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
16965 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
16966 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
16967 // CHECK: ret <4 x i32> [[TMP2]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)16968 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
16969 return vshlq_u32(a, b);
16970 }
16971
16972 // CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
16973 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16974 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16975 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
16976 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
16977 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
16978 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
16979 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
16980 // CHECK: ret <2 x i64> [[TMP2]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)16981 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
16982 return vshlq_u64(a, b);
16983 }
16984
16985
16986 // CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
16987 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
16988 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
16989 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)16990 int16x8_t test_vshll_n_s8(int8x8_t a) {
16991 return vshll_n_s8(a, 1);
16992 }
16993
16994 // CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
16995 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16996 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
16997 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
16998 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
16999 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)17000 int32x4_t test_vshll_n_s16(int16x4_t a) {
17001 return vshll_n_s16(a, 1);
17002 }
17003
17004 // CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
17005 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17006 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17007 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
17008 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
17009 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)17010 int64x2_t test_vshll_n_s32(int32x2_t a) {
17011 return vshll_n_s32(a, 1);
17012 }
17013
17014 // CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
17015 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
17016 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
17017 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)17018 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
17019 return vshll_n_u8(a, 1);
17020 }
17021
17022 // CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
17023 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17024 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17025 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
17026 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
17027 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)17028 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
17029 return vshll_n_u16(a, 1);
17030 }
17031
17032 // CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
17033 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17034 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17035 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
17036 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
17037 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)17038 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
17039 return vshll_n_u32(a, 1);
17040 }
17041
17042
17043 // CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 {
17044 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17045 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)17046 int8x8_t test_vshl_n_s8(int8x8_t a) {
17047 return vshl_n_s8(a, 1);
17048 }
17049
17050 // CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 {
17051 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17052 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17053 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
17054 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)17055 int16x4_t test_vshl_n_s16(int16x4_t a) {
17056 return vshl_n_s16(a, 1);
17057 }
17058
17059 // CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 {
17060 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17061 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17062 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
17063 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)17064 int32x2_t test_vshl_n_s32(int32x2_t a) {
17065 return vshl_n_s32(a, 1);
17066 }
17067
17068 // CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 {
17069 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17070 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17071 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
17072 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)17073 int64x1_t test_vshl_n_s64(int64x1_t a) {
17074 return vshl_n_s64(a, 1);
17075 }
17076
17077 // CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 {
17078 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17079 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(uint8x8_t a)17080 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
17081 return vshl_n_u8(a, 1);
17082 }
17083
17084 // CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 {
17085 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17086 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17087 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
17088 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(uint16x4_t a)17089 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
17090 return vshl_n_u16(a, 1);
17091 }
17092
17093 // CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 {
17094 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17095 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17096 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
17097 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(uint32x2_t a)17098 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
17099 return vshl_n_u32(a, 1);
17100 }
17101
17102 // CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 {
17103 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17104 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17105 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
17106 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)17107 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
17108 return vshl_n_u64(a, 1);
17109 }
17110
17111 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 {
17112 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17113 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)17114 int8x16_t test_vshlq_n_s8(int8x16_t a) {
17115 return vshlq_n_s8(a, 1);
17116 }
17117
17118 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 {
17119 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17120 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17121 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
17122 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)17123 int16x8_t test_vshlq_n_s16(int16x8_t a) {
17124 return vshlq_n_s16(a, 1);
17125 }
17126
17127 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 {
17128 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17129 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17130 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
17131 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)17132 int32x4_t test_vshlq_n_s32(int32x4_t a) {
17133 return vshlq_n_s32(a, 1);
17134 }
17135
17136 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 {
17137 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17138 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17139 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
17140 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)17141 int64x2_t test_vshlq_n_s64(int64x2_t a) {
17142 return vshlq_n_s64(a, 1);
17143 }
17144
17145 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 {
17146 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17147 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(uint8x16_t a)17148 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
17149 return vshlq_n_u8(a, 1);
17150 }
17151
17152 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 {
17153 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17154 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17155 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
17156 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(uint16x8_t a)17157 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
17158 return vshlq_n_u16(a, 1);
17159 }
17160
17161 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 {
17162 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17163 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17164 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
17165 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(uint32x4_t a)17166 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
17167 return vshlq_n_u32(a, 1);
17168 }
17169
17170 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 {
17171 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17172 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17173 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
17174 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(uint64x2_t a)17175 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
17176 return vshlq_n_u64(a, 1);
17177 }
17178
17179
17180 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 {
17181 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17182 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17183 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
17184 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
17185 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)17186 int8x8_t test_vshrn_n_s16(int16x8_t a) {
17187 return vshrn_n_s16(a, 1);
17188 }
17189
17190 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 {
17191 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17192 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17193 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
17194 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
17195 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)17196 int16x4_t test_vshrn_n_s32(int32x4_t a) {
17197 return vshrn_n_s32(a, 1);
17198 }
17199
17200 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 {
17201 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17202 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17203 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
17204 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
17205 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)17206 int32x2_t test_vshrn_n_s64(int64x2_t a) {
17207 return vshrn_n_s64(a, 1);
17208 }
17209
17210 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 {
17211 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17212 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17213 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
17214 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
17215 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)17216 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
17217 return vshrn_n_u16(a, 1);
17218 }
17219
17220 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 {
17221 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17222 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17223 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
17224 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
17225 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)17226 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
17227 return vshrn_n_u32(a, 1);
17228 }
17229
17230 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 {
17231 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17232 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17233 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
17234 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
17235 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)17236 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
17237 return vshrn_n_u64(a, 1);
17238 }
17239
17240
17241 // CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 {
17242 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17243 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)17244 int8x8_t test_vshr_n_s8(int8x8_t a) {
17245 return vshr_n_s8(a, 1);
17246 }
17247
17248 // CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 {
17249 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17250 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17251 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
17252 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)17253 int16x4_t test_vshr_n_s16(int16x4_t a) {
17254 return vshr_n_s16(a, 1);
17255 }
17256
17257 // CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 {
17258 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17259 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17260 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1>
17261 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)17262 int32x2_t test_vshr_n_s32(int32x2_t a) {
17263 return vshr_n_s32(a, 1);
17264 }
17265
17266 // CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 {
17267 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17268 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17269 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
17270 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)17271 int64x1_t test_vshr_n_s64(int64x1_t a) {
17272 return vshr_n_s64(a, 1);
17273 }
17274
17275 // CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 {
17276 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17277 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(uint8x8_t a)17278 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
17279 return vshr_n_u8(a, 1);
17280 }
17281
17282 // CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 {
17283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17284 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17285 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
17286 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(uint16x4_t a)17287 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
17288 return vshr_n_u16(a, 1);
17289 }
17290
17291 // CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 {
17292 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17293 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17294 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
17295 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(uint32x2_t a)17296 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
17297 return vshr_n_u32(a, 1);
17298 }
17299
17300 // CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 {
17301 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17302 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17303 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
17304 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)17305 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
17306 return vshr_n_u64(a, 1);
17307 }
17308
17309 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 {
17310 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17311 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)17312 int8x16_t test_vshrq_n_s8(int8x16_t a) {
17313 return vshrq_n_s8(a, 1);
17314 }
17315
17316 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 {
17317 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17318 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17319 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
17320 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)17321 int16x8_t test_vshrq_n_s16(int16x8_t a) {
17322 return vshrq_n_s16(a, 1);
17323 }
17324
17325 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 {
17326 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17327 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17328 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
17329 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)17330 int32x4_t test_vshrq_n_s32(int32x4_t a) {
17331 return vshrq_n_s32(a, 1);
17332 }
17333
17334 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 {
17335 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17336 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17337 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
17338 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)17339 int64x2_t test_vshrq_n_s64(int64x2_t a) {
17340 return vshrq_n_s64(a, 1);
17341 }
17342
17343 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 {
17344 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17345 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(uint8x16_t a)17346 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
17347 return vshrq_n_u8(a, 1);
17348 }
17349
17350 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 {
17351 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17352 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17353 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
17354 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(uint16x8_t a)17355 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
17356 return vshrq_n_u16(a, 1);
17357 }
17358
17359 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 {
17360 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17361 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17362 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
17363 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(uint32x4_t a)17364 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
17365 return vshrq_n_u32(a, 1);
17366 }
17367
17368 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 {
17369 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17370 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17371 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
17372 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(uint64x2_t a)17373 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
17374 return vshrq_n_u64(a, 1);
17375 }
17376
17377
17378 // CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
17379 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
17380 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)17381 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
17382 return vsli_n_s8(a, b, 1);
17383 }
17384
17385 // CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
17386 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17387 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17388 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17389 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
17390 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
17391 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)17392 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
17393 return vsli_n_s16(a, b, 1);
17394 }
17395
17396 // CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
17397 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17398 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17399 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17400 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
17401 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
17402 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)17403 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
17404 return vsli_n_s32(a, b, 1);
17405 }
17406
17407 // CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
17408 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17409 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17410 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17411 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
17412 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
17413 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)17414 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
17415 return vsli_n_s64(a, b, 1);
17416 }
17417
17418 // CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
17419 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
17420 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)17421 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
17422 return vsli_n_u8(a, b, 1);
17423 }
17424
17425 // CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
17426 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17427 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17428 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17429 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
17430 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
17431 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)17432 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
17433 return vsli_n_u16(a, b, 1);
17434 }
17435
17436 // CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
17437 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17438 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17439 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17440 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
17441 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
17442 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)17443 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
17444 return vsli_n_u32(a, b, 1);
17445 }
17446
17447 // CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
17448 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17449 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17450 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17451 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
17452 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
17453 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)17454 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
17455 return vsli_n_u64(a, b, 1);
17456 }
17457
17458 // CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
17459 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
17460 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)17461 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
17462 return vsli_n_p8(a, b, 1);
17463 }
17464
17465 // CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
17466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17467 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17468 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17469 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
17470 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
17471 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)17472 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
17473 return vsli_n_p16(a, b, 1);
17474 }
17475
17476 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
17477 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
17478 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)17479 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
17480 return vsliq_n_s8(a, b, 1);
17481 }
17482
17483 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
17484 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17485 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17486 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17487 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17488 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
17489 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)17490 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
17491 return vsliq_n_s16(a, b, 1);
17492 }
17493
17494 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
17495 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17496 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17497 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17498 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
17499 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
17500 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)17501 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
17502 return vsliq_n_s32(a, b, 1);
17503 }
17504
17505 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
17506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17507 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17508 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17509 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
17510 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
17511 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)17512 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
17513 return vsliq_n_s64(a, b, 1);
17514 }
17515
17516 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
17517 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
17518 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)17519 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
17520 return vsliq_n_u8(a, b, 1);
17521 }
17522
17523 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
17524 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17525 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17526 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17527 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17528 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
17529 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)17530 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
17531 return vsliq_n_u16(a, b, 1);
17532 }
17533
17534 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
17535 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17536 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17537 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17538 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
17539 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
17540 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)17541 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
17542 return vsliq_n_u32(a, b, 1);
17543 }
17544
17545 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
17546 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17547 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17548 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17549 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
17550 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
17551 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)17552 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
17553 return vsliq_n_u64(a, b, 1);
17554 }
17555
17556 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
17557 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
17558 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)17559 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
17560 return vsliq_n_p8(a, b, 1);
17561 }
17562
17563 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
17564 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17565 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17566 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17567 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17568 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
17569 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)17570 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
17571 return vsliq_n_p16(a, b, 1);
17572 }
17573
17574
17575 // CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
17576 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17577 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
17578 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)17579 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
17580 return vsra_n_s8(a, b, 1);
17581 }
17582
17583 // CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
17584 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17585 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17586 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17587 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
17588 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
17589 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
17590 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)17591 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
17592 return vsra_n_s16(a, b, 1);
17593 }
17594
17595 // CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
17596 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17597 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17598 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17599 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
17600 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 1, i32 1>
17601 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
17602 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)17603 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
17604 return vsra_n_s32(a, b, 1);
17605 }
17606
17607 // CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
17608 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17609 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17610 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17611 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
17612 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
17613 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
17614 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)17615 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
17616 return vsra_n_s64(a, b, 1);
17617 }
17618
17619 // CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
17620 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17621 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
17622 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_u8(uint8x8_t a,uint8x8_t b)17623 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
17624 return vsra_n_u8(a, b, 1);
17625 }
17626
17627 // CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
17628 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17629 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17630 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17631 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
17632 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
17633 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
17634 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_u16(uint16x4_t a,uint16x4_t b)17635 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
17636 return vsra_n_u16(a, b, 1);
17637 }
17638
17639 // CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
17640 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17641 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17642 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17643 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
17644 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1>
17645 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
17646 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_u32(uint32x2_t a,uint32x2_t b)17647 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
17648 return vsra_n_u32(a, b, 1);
17649 }
17650
17651 // CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
17652 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17653 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17654 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17655 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
17656 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
17657 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
17658 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)17659 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
17660 return vsra_n_u64(a, b, 1);
17661 }
17662
17663 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
17664 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17665 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
17666 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)17667 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
17668 return vsraq_n_s8(a, b, 1);
17669 }
17670
17671 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
17672 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17673 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17674 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17675 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17676 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
17677 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
17678 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)17679 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
17680 return vsraq_n_s16(a, b, 1);
17681 }
17682
17683 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
17684 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17685 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17686 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17687 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
17688 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
17689 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
17690 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)17691 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
17692 return vsraq_n_s32(a, b, 1);
17693 }
17694
17695 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
17696 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17697 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17698 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17699 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
17700 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 1, i64 1>
17701 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
17702 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)17703 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
17704 return vsraq_n_s64(a, b, 1);
17705 }
17706
17707 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
17708 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
17709 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
17710 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(uint8x16_t a,uint8x16_t b)17711 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
17712 return vsraq_n_u8(a, b, 1);
17713 }
17714
17715 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
17716 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17717 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17718 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17719 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17720 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
17721 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
17722 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(uint16x8_t a,uint16x8_t b)17723 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
17724 return vsraq_n_u16(a, b, 1);
17725 }
17726
17727 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
17728 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17729 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17730 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17731 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
17732 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
17733 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
17734 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(uint32x4_t a,uint32x4_t b)17735 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
17736 return vsraq_n_u32(a, b, 1);
17737 }
17738
17739 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
17740 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17741 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17742 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17743 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
17744 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 1, i64 1>
17745 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
17746 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(uint64x2_t a,uint64x2_t b)17747 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
17748 return vsraq_n_u64(a, b, 1);
17749 }
17750
17751
17752 // CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
17753 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
17754 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)17755 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
17756 return vsri_n_s8(a, b, 1);
17757 }
17758
17759 // CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
17760 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17761 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17762 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17763 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
17764 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
17765 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)17766 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
17767 return vsri_n_s16(a, b, 1);
17768 }
17769
17770 // CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
17771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17772 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17773 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17774 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
17775 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
17776 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)17777 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
17778 return vsri_n_s32(a, b, 1);
17779 }
17780
17781 // CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
17782 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17783 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17784 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17785 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
17786 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
17787 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)17788 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
17789 return vsri_n_s64(a, b, 1);
17790 }
17791
17792 // CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
17793 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
17794 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsri_n_u8(uint8x8_t a,uint8x8_t b)17795 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
17796 return vsri_n_u8(a, b, 1);
17797 }
17798
17799 // CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
17800 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17801 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17802 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17803 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
17804 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
17805 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsri_n_u16(uint16x4_t a,uint16x4_t b)17806 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
17807 return vsri_n_u16(a, b, 1);
17808 }
17809
17810 // CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
17811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17812 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17813 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
17814 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
17815 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
17816 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsri_n_u32(uint32x2_t a,uint32x2_t b)17817 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
17818 return vsri_n_u32(a, b, 1);
17819 }
17820
17821 // CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
17822 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17823 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17824 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17825 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
17826 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
17827 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)17828 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
17829 return vsri_n_u64(a, b, 1);
17830 }
17831
17832 // CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
17833 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
17834 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)17835 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
17836 return vsri_n_p8(a, b, 1);
17837 }
17838
17839 // CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
17840 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17841 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17842 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17843 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
17844 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
17845 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)17846 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
17847 return vsri_n_p16(a, b, 1);
17848 }
17849
17850 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
17851 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
17852 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)17853 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
17854 return vsriq_n_s8(a, b, 1);
17855 }
17856
17857 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
17858 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17859 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17860 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17861 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17862 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
17863 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)17864 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
17865 return vsriq_n_s16(a, b, 1);
17866 }
17867
17868 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
17869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17870 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17871 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17872 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
17873 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
17874 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)17875 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
17876 return vsriq_n_s32(a, b, 1);
17877 }
17878
17879 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
17880 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17881 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17882 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17883 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
17884 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
17885 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)17886 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
17887 return vsriq_n_s64(a, b, 1);
17888 }
17889
17890 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
17891 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
17892 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsriq_n_u8(uint8x16_t a,uint8x16_t b)17893 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
17894 return vsriq_n_u8(a, b, 1);
17895 }
17896
17897 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
17898 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17899 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17900 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17901 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17902 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
17903 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_u16(uint16x8_t a,uint16x8_t b)17904 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
17905 return vsriq_n_u16(a, b, 1);
17906 }
17907
17908 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
17909 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17910 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17911 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
17912 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
17913 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
17914 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_u32(uint32x4_t a,uint32x4_t b)17915 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
17916 return vsriq_n_u32(a, b, 1);
17917 }
17918
17919 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
17920 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17921 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17922 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
17923 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
17924 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
17925 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_u64(uint64x2_t a,uint64x2_t b)17926 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
17927 return vsriq_n_u64(a, b, 1);
17928 }
17929
17930 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
17931 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
17932 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)17933 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
17934 return vsriq_n_p8(a, b, 1);
17935 }
17936
17937 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
17938 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17939 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17940 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
17941 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17942 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
17943 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)17944 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
17945 return vsriq_n_p16(a, b, 1);
17946 }
17947
17948
17949 // CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 {
17950 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
17951 // CHECK: ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)17952 void test_vst1q_u8(uint8_t * a, uint8x16_t b) {
17953 vst1q_u8(a, b);
17954 }
17955
17956 // CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 {
17957 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
17958 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17959 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17960 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
17961 // CHECK: ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)17962 void test_vst1q_u16(uint16_t * a, uint16x8_t b) {
17963 vst1q_u16(a, b);
17964 }
17965
17966 // CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 {
17967 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
17968 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17969 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
17970 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
17971 // CHECK: ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)17972 void test_vst1q_u32(uint32_t * a, uint32x4_t b) {
17973 vst1q_u32(a, b);
17974 }
17975
17976 // CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 {
17977 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
17978 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17979 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
17980 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
17981 // CHECK: ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)17982 void test_vst1q_u64(uint64_t * a, uint64x2_t b) {
17983 vst1q_u64(a, b);
17984 }
17985
17986 // CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 {
17987 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
17988 // CHECK: ret void
test_vst1q_s8(int8_t * a,int8x16_t b)17989 void test_vst1q_s8(int8_t * a, int8x16_t b) {
17990 vst1q_s8(a, b);
17991 }
17992
17993 // CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 {
17994 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
17995 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17996 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
17997 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
17998 // CHECK: ret void
test_vst1q_s16(int16_t * a,int16x8_t b)17999 void test_vst1q_s16(int16_t * a, int16x8_t b) {
18000 vst1q_s16(a, b);
18001 }
18002
18003 // CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 {
18004 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
18005 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
18006 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
18007 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
18008 // CHECK: ret void
test_vst1q_s32(int32_t * a,int32x4_t b)18009 void test_vst1q_s32(int32_t * a, int32x4_t b) {
18010 vst1q_s32(a, b);
18011 }
18012
18013 // CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 {
18014 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
18015 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
18016 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
18017 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
18018 // CHECK: ret void
test_vst1q_s64(int64_t * a,int64x2_t b)18019 void test_vst1q_s64(int64_t * a, int64x2_t b) {
18020 vst1q_s64(a, b);
18021 }
18022
18023 // CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 {
18024 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
18025 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
18026 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
18027 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
18028 // CHECK: ret void
test_vst1q_f16(float16_t * a,float16x8_t b)18029 void test_vst1q_f16(float16_t * a, float16x8_t b) {
18030 vst1q_f16(a, b);
18031 }
18032
18033 // CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 {
18034 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
18035 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
18036 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
18037 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* [[TMP0]], <4 x float> [[TMP2]], i32 4)
18038 // CHECK: ret void
test_vst1q_f32(float32_t * a,float32x4_t b)18039 void test_vst1q_f32(float32_t * a, float32x4_t b) {
18040 vst1q_f32(a, b);
18041 }
18042
18043 // CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 {
18044 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
18045 // CHECK: ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)18046 void test_vst1q_p8(poly8_t * a, poly8x16_t b) {
18047 vst1q_p8(a, b);
18048 }
18049
18050 // CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 {
18051 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18052 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
18053 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
18054 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
18055 // CHECK: ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)18056 void test_vst1q_p16(poly16_t * a, poly16x8_t b) {
18057 vst1q_p16(a, b);
18058 }
18059
18060 // CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 {
18061 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
18062 // CHECK: ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)18063 void test_vst1_u8(uint8_t * a, uint8x8_t b) {
18064 vst1_u8(a, b);
18065 }
18066
18067 // CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 {
18068 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18069 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18070 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
18071 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
18072 // CHECK: ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)18073 void test_vst1_u16(uint16_t * a, uint16x4_t b) {
18074 vst1_u16(a, b);
18075 }
18076
18077 // CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 {
18078 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
18079 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
18080 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
18081 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
18082 // CHECK: ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)18083 void test_vst1_u32(uint32_t * a, uint32x2_t b) {
18084 vst1_u32(a, b);
18085 }
18086
18087 // CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 {
18088 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
18089 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18090 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18091 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
18092 // CHECK: ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)18093 void test_vst1_u64(uint64_t * a, uint64x1_t b) {
18094 vst1_u64(a, b);
18095 }
18096
18097 // CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 {
18098 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
18099 // CHECK: ret void
test_vst1_s8(int8_t * a,int8x8_t b)18100 void test_vst1_s8(int8_t * a, int8x8_t b) {
18101 vst1_s8(a, b);
18102 }
18103
18104 // CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 {
18105 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18106 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18107 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
18108 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
18109 // CHECK: ret void
test_vst1_s16(int16_t * a,int16x4_t b)18110 void test_vst1_s16(int16_t * a, int16x4_t b) {
18111 vst1_s16(a, b);
18112 }
18113
18114 // CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 {
18115 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
18116 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
18117 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
18118 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
18119 // CHECK: ret void
test_vst1_s32(int32_t * a,int32x2_t b)18120 void test_vst1_s32(int32_t * a, int32x2_t b) {
18121 vst1_s32(a, b);
18122 }
18123
18124 // CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 {
18125 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
18126 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18127 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18128 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
18129 // CHECK: ret void
test_vst1_s64(int64_t * a,int64x1_t b)18130 void test_vst1_s64(int64_t * a, int64x1_t b) {
18131 vst1_s64(a, b);
18132 }
18133
18134 // CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 {
18135 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
18136 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
18137 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
18138 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
18139 // CHECK: ret void
test_vst1_f16(float16_t * a,float16x4_t b)18140 void test_vst1_f16(float16_t * a, float16x4_t b) {
18141 vst1_f16(a, b);
18142 }
18143
18144 // CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 {
18145 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
18146 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
18147 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
18148 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* [[TMP0]], <2 x float> [[TMP2]], i32 4)
18149 // CHECK: ret void
test_vst1_f32(float32_t * a,float32x2_t b)18150 void test_vst1_f32(float32_t * a, float32x2_t b) {
18151 vst1_f32(a, b);
18152 }
18153
18154 // CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 {
18155 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
18156 // CHECK: ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)18157 void test_vst1_p8(poly8_t * a, poly8x8_t b) {
18158 vst1_p8(a, b);
18159 }
18160
18161 // CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 {
18162 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18163 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18164 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
18165 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
18166 // CHECK: ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)18167 void test_vst1_p16(poly16_t * a, poly16x4_t b) {
18168 vst1_p16(a, b);
18169 }
18170
18171
18172 // CHECK-LABEL: define void @test_vst1q_lane_u8(i8* %a, <16 x i8> %b) #0 {
18173 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
18174 // CHECK: store i8 [[TMP0]], i8* %a, align 1
18175 // CHECK: ret void
test_vst1q_lane_u8(uint8_t * a,uint8x16_t b)18176 void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) {
18177 vst1q_lane_u8(a, b, 15);
18178 }
18179
18180 // CHECK-LABEL: define void @test_vst1q_lane_u16(i16* %a, <8 x i16> %b) #0 {
18181 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18182 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
18183 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
18184 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
18185 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
18186 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
18187 // CHECK: ret void
test_vst1q_lane_u16(uint16_t * a,uint16x8_t b)18188 void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) {
18189 vst1q_lane_u16(a, b, 7);
18190 }
18191
18192 // CHECK-LABEL: define void @test_vst1q_lane_u32(i32* %a, <4 x i32> %b) #0 {
18193 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
18194 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
18195 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
18196 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
18197 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
18198 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
18199 // CHECK: ret void
test_vst1q_lane_u32(uint32_t * a,uint32x4_t b)18200 void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) {
18201 vst1q_lane_u32(a, b, 3);
18202 }
18203
18204 // CHECK-LABEL: define void @test_vst1q_lane_u64(i64* %a, <2 x i64> %b) #0 {
18205 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
18206 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
18207 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
18208 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
18209 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
18210 // CHECK: ret void
test_vst1q_lane_u64(uint64_t * a,uint64x2_t b)18211 void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) {
18212 vst1q_lane_u64(a, b, 1);
18213 }
18214
18215 // CHECK-LABEL: define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) #0 {
18216 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
18217 // CHECK: store i8 [[TMP0]], i8* %a, align 1
18218 // CHECK: ret void
test_vst1q_lane_s8(int8_t * a,int8x16_t b)18219 void test_vst1q_lane_s8(int8_t * a, int8x16_t b) {
18220 vst1q_lane_s8(a, b, 15);
18221 }
18222
18223 // CHECK-LABEL: define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) #0 {
18224 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18225 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
18226 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
18227 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
18228 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
18229 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
18230 // CHECK: ret void
test_vst1q_lane_s16(int16_t * a,int16x8_t b)18231 void test_vst1q_lane_s16(int16_t * a, int16x8_t b) {
18232 vst1q_lane_s16(a, b, 7);
18233 }
18234
18235 // CHECK-LABEL: define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) #0 {
18236 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
18237 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
18238 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
18239 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
18240 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
18241 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
18242 // CHECK: ret void
test_vst1q_lane_s32(int32_t * a,int32x4_t b)18243 void test_vst1q_lane_s32(int32_t * a, int32x4_t b) {
18244 vst1q_lane_s32(a, b, 3);
18245 }
18246
18247 // CHECK-LABEL: define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) #0 {
18248 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
18249 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
18250 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
18251 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
18252 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
18253 // CHECK: ret void
test_vst1q_lane_s64(int64_t * a,int64x2_t b)18254 void test_vst1q_lane_s64(int64_t * a, int64x2_t b) {
18255 vst1q_lane_s64(a, b, 1);
18256 }
18257
18258 // CHECK-LABEL: define void @test_vst1q_lane_f16(half* %a, <8 x half> %b) #0 {
18259 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
18260 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
18261 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
18262 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
18263 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
18264 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
18265 // CHECK: ret void
test_vst1q_lane_f16(float16_t * a,float16x8_t b)18266 void test_vst1q_lane_f16(float16_t * a, float16x8_t b) {
18267 vst1q_lane_f16(a, b, 7);
18268 }
18269
18270 // CHECK-LABEL: define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) #0 {
18271 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
18272 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
18273 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
18274 // CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
18275 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
18276 // CHECK: store float [[TMP3]], float* [[TMP4]], align 4
18277 // CHECK: ret void
test_vst1q_lane_f32(float32_t * a,float32x4_t b)18278 void test_vst1q_lane_f32(float32_t * a, float32x4_t b) {
18279 vst1q_lane_f32(a, b, 3);
18280 }
18281
18282 // CHECK-LABEL: define void @test_vst1q_lane_p8(i8* %a, <16 x i8> %b) #0 {
18283 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
18284 // CHECK: store i8 [[TMP0]], i8* %a, align 1
18285 // CHECK: ret void
test_vst1q_lane_p8(poly8_t * a,poly8x16_t b)18286 void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) {
18287 vst1q_lane_p8(a, b, 15);
18288 }
18289
18290 // CHECK-LABEL: define void @test_vst1q_lane_p16(i16* %a, <8 x i16> %b) #0 {
18291 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18292 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
18293 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
18294 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
18295 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
18296 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
18297 // CHECK: ret void
test_vst1q_lane_p16(poly16_t * a,poly16x8_t b)18298 void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) {
18299 vst1q_lane_p16(a, b, 7);
18300 }
18301
18302 // CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #0 {
18303 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
18304 // CHECK: store i8 [[TMP0]], i8* %a, align 1
18305 // CHECK: ret void
test_vst1_lane_u8(uint8_t * a,uint8x8_t b)18306 void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) {
18307 vst1_lane_u8(a, b, 7);
18308 }
18309
18310 // CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #0 {
18311 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18312 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18313 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
18314 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
18315 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
18316 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
18317 // CHECK: ret void
test_vst1_lane_u16(uint16_t * a,uint16x4_t b)18318 void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) {
18319 vst1_lane_u16(a, b, 3);
18320 }
18321
18322 // CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #0 {
18323 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
18324 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
18325 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
18326 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
18327 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
18328 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
18329 // CHECK: ret void
test_vst1_lane_u32(uint32_t * a,uint32x2_t b)18330 void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) {
18331 vst1_lane_u32(a, b, 1);
18332 }
18333
18334 // CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #0 {
18335 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
18336 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18337 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18338 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
18339 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
18340 // CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4
18341 // CHECK: ret void
test_vst1_lane_u64(uint64_t * a,uint64x1_t b)18342 void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) {
18343 vst1_lane_u64(a, b, 0);
18344 }
18345
18346 // CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #0 {
18347 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
18348 // CHECK: store i8 [[TMP0]], i8* %a, align 1
18349 // CHECK: ret void
test_vst1_lane_s8(int8_t * a,int8x8_t b)18350 void test_vst1_lane_s8(int8_t * a, int8x8_t b) {
18351 vst1_lane_s8(a, b, 7);
18352 }
18353
18354 // CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #0 {
18355 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18356 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18357 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
18358 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
18359 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
18360 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
18361 // CHECK: ret void
test_vst1_lane_s16(int16_t * a,int16x4_t b)18362 void test_vst1_lane_s16(int16_t * a, int16x4_t b) {
18363 vst1_lane_s16(a, b, 3);
18364 }
18365
18366 // CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #0 {
18367 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
18368 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
18369 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
18370 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
18371 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
18372 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
18373 // CHECK: ret void
test_vst1_lane_s32(int32_t * a,int32x2_t b)18374 void test_vst1_lane_s32(int32_t * a, int32x2_t b) {
18375 vst1_lane_s32(a, b, 1);
18376 }
18377
18378 // CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #0 {
18379 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
18380 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18381 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18382 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
18383 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
18384 // CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4
18385 // CHECK: ret void
test_vst1_lane_s64(int64_t * a,int64x1_t b)18386 void test_vst1_lane_s64(int64_t * a, int64x1_t b) {
18387 vst1_lane_s64(a, b, 0);
18388 }
18389
18390 // CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 {
18391 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
18392 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
18393 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
18394 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
18395 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
18396 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
18397 // CHECK: ret void
test_vst1_lane_f16(float16_t * a,float16x4_t b)18398 void test_vst1_lane_f16(float16_t * a, float16x4_t b) {
18399 vst1_lane_f16(a, b, 3);
18400 }
18401
18402 // CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #0 {
18403 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
18404 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
18405 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
18406 // CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
18407 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
18408 // CHECK: store float [[TMP3]], float* [[TMP4]], align 4
18409 // CHECK: ret void
test_vst1_lane_f32(float32_t * a,float32x2_t b)18410 void test_vst1_lane_f32(float32_t * a, float32x2_t b) {
18411 vst1_lane_f32(a, b, 1);
18412 }
18413
18414 // CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #0 {
18415 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
18416 // CHECK: store i8 [[TMP0]], i8* %a, align 1
18417 // CHECK: ret void
test_vst1_lane_p8(poly8_t * a,poly8x8_t b)18418 void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) {
18419 vst1_lane_p8(a, b, 7);
18420 }
18421
18422 // CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #0 {
18423 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
18424 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18425 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
18426 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
18427 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
18428 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
18429 // CHECK: ret void
test_vst1_lane_p16(poly16_t * a,poly16x4_t b)18430 void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) {
18431 vst1_lane_p16(a, b, 3);
18432 }
18433
18434
18435 // CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [4 x i64] %b.coerce) #0 {
18436 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
18437 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
18438 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
18439 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18440 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18441 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
18442 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
18443 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18444 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
18445 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
18446 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18447 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
18448 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18449 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18450 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
18451 // CHECK: ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)18452 void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) {
18453 vst2q_u8(a, b);
18454 }
18455
18456 // CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [4 x i64] %b.coerce) #0 {
18457 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
18458 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
18459 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
18460 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18461 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18462 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
18463 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
18464 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18465 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18466 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
18467 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
18468 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18469 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18470 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
18471 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18472 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18473 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18474 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18475 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18476 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
18477 // CHECK: ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)18478 void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) {
18479 vst2q_u16(a, b);
18480 }
18481
18482 // CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [4 x i64] %b.coerce) #0 {
18483 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
18484 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
18485 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
18486 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18487 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18488 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
18489 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
18490 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18491 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18492 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
18493 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
18494 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18495 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18496 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
18497 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18498 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18499 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18500 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18501 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18502 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
18503 // CHECK: ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)18504 void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) {
18505 vst2q_u32(a, b);
18506 }
18507
18508 // CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [4 x i64] %b.coerce) #0 {
18509 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
18510 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
18511 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
18512 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18513 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18514 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
18515 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
18516 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18517 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
18518 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
18519 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18520 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
18521 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18522 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18523 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
18524 // CHECK: ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)18525 void test_vst2q_s8(int8_t * a, int8x16x2_t b) {
18526 vst2q_s8(a, b);
18527 }
18528
18529 // CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [4 x i64] %b.coerce) #0 {
18530 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
18531 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
18532 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
18533 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18534 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18535 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
18536 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
18537 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18538 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18539 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
18540 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
18541 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18542 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18543 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
18544 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18545 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18546 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18547 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18548 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18549 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
18550 // CHECK: ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)18551 void test_vst2q_s16(int16_t * a, int16x8x2_t b) {
18552 vst2q_s16(a, b);
18553 }
18554
18555 // CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [4 x i64] %b.coerce) #0 {
18556 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
18557 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
18558 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
18559 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18560 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18561 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
18562 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
18563 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18564 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18565 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
18566 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
18567 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18568 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18569 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
18570 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18571 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18572 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18573 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18574 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18575 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
18576 // CHECK: ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)18577 void test_vst2q_s32(int32_t * a, int32x4x2_t b) {
18578 vst2q_s32(a, b);
18579 }
18580
18581 // CHECK-LABEL: define void @test_vst2q_f16(half* %a, [4 x i64] %b.coerce) #0 {
18582 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
18583 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
18584 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
18585 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
18586 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18587 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
18588 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
18589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18590 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
18591 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
18592 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
18593 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
18594 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
18595 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
18596 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
18597 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
18598 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
18599 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18600 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18601 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
18602 // CHECK: ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)18603 void test_vst2q_f16(float16_t * a, float16x8x2_t b) {
18604 vst2q_f16(a, b);
18605 }
18606
18607 // CHECK-LABEL: define void @test_vst2q_f32(float* %a, [4 x i64] %b.coerce) #0 {
18608 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
18609 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
18610 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
18611 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
18612 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18613 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
18614 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
18615 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18616 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
18617 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
18618 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
18619 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
18620 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
18621 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
18622 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
18623 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
18624 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
18625 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
18626 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
18627 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 4)
18628 // CHECK: ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)18629 void test_vst2q_f32(float32_t * a, float32x4x2_t b) {
18630 vst2q_f32(a, b);
18631 }
18632
18633 // CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [4 x i64] %b.coerce) #0 {
18634 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
18635 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
18636 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
18637 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18638 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18639 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
18640 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
18641 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18642 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
18643 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
18644 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18645 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
18646 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18647 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18648 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
18649 // CHECK: ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)18650 void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) {
18651 vst2q_p8(a, b);
18652 }
18653
18654 // CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [4 x i64] %b.coerce) #0 {
18655 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
18656 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
18657 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
18658 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18659 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18660 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
18661 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
18662 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18663 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18664 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
18665 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
18666 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18667 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18668 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
18669 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18670 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18671 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18672 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18673 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18674 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
18675 // CHECK: ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)18676 void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) {
18677 vst2q_p16(a, b);
18678 }
18679
18680 // CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x i64] %b.coerce) #0 {
18681 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
18682 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
18683 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
18684 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
18685 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18686 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
18687 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
18688 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18689 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
18690 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
18691 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18692 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
18693 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18694 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18695 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
18696 // CHECK: ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)18697 void test_vst2_u8(uint8_t * a, uint8x8x2_t b) {
18698 vst2_u8(a, b);
18699 }
18700
18701 // CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x i64] %b.coerce) #0 {
18702 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
18703 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
18704 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
18705 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
18706 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18707 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
18708 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
18709 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18710 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18711 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
18712 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
18713 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18714 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18715 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
18716 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18717 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18718 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18719 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18720 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18721 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
18722 // CHECK: ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)18723 void test_vst2_u16(uint16_t * a, uint16x4x2_t b) {
18724 vst2_u16(a, b);
18725 }
18726
18727 // CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x i64] %b.coerce) #0 {
18728 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
18729 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
18730 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
18731 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
18732 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18733 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
18734 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
18735 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18736 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18737 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
18738 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
18739 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18740 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18741 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
18742 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18743 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18744 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18745 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18746 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18747 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
18748 // CHECK: ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)18749 void test_vst2_u32(uint32_t * a, uint32x2x2_t b) {
18750 vst2_u32(a, b);
18751 }
18752
18753 // CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x i64] %b.coerce) #0 {
18754 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
18755 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
18756 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
18757 // CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
18758 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18759 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
18760 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
18761 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18762 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
18763 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
18764 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
18765 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18766 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18767 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
18768 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18769 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18770 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18771 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18772 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18773 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
18774 // CHECK: ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)18775 void test_vst2_u64(uint64_t * a, uint64x1x2_t b) {
18776 vst2_u64(a, b);
18777 }
18778
18779 // CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x i64] %b.coerce) #0 {
18780 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
18781 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
18782 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
18783 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
18784 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18785 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
18786 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
18787 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18788 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
18789 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
18790 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18791 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
18792 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18793 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18794 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
18795 // CHECK: ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)18796 void test_vst2_s8(int8_t * a, int8x8x2_t b) {
18797 vst2_s8(a, b);
18798 }
18799
18800 // CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x i64] %b.coerce) #0 {
18801 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
18802 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
18803 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
18804 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
18805 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18806 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
18807 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
18808 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18809 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18810 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
18811 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
18812 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18813 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18814 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
18815 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18816 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18817 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18818 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18819 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18820 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
18821 // CHECK: ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)18822 void test_vst2_s16(int16_t * a, int16x4x2_t b) {
18823 vst2_s16(a, b);
18824 }
18825
18826 // CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x i64] %b.coerce) #0 {
18827 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
18828 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
18829 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
18830 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
18831 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18832 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
18833 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
18834 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18835 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18836 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
18837 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
18838 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18839 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18840 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
18841 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18842 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18843 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18844 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18845 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18846 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
18847 // CHECK: ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)18848 void test_vst2_s32(int32_t * a, int32x2x2_t b) {
18849 vst2_s32(a, b);
18850 }
18851
18852 // CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x i64] %b.coerce) #0 {
18853 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
18854 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
18855 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
18856 // CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
18857 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18858 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
18859 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
18860 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18861 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
18862 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
18863 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
18864 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18865 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18866 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
18867 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18868 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18869 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18870 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18871 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18872 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
18873 // CHECK: ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)18874 void test_vst2_s64(int64_t * a, int64x1x2_t b) {
18875 vst2_s64(a, b);
18876 }
18877
18878 // CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x i64] %b.coerce) #0 {
18879 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
18880 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
18881 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
18882 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
18883 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18884 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
18885 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
18886 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18887 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
18888 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
18889 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
18890 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
18891 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
18892 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
18893 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
18894 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
18895 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
18896 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18897 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18898 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
18899 // CHECK: ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)18900 void test_vst2_f16(float16_t * a, float16x4x2_t b) {
18901 vst2_f16(a, b);
18902 }
18903
18904 // CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x i64] %b.coerce) #0 {
18905 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
18906 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
18907 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
18908 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
18909 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18910 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
18911 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
18912 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18913 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
18914 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
18915 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
18916 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
18917 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
18918 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
18919 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
18920 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
18921 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18922 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18923 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18924 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 4)
18925 // CHECK: ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)18926 void test_vst2_f32(float32_t * a, float32x2x2_t b) {
18927 vst2_f32(a, b);
18928 }
18929
18930 // CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x i64] %b.coerce) #0 {
18931 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
18932 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
18933 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
18934 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
18935 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18936 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
18937 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
18938 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18939 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
18940 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
18941 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18942 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
18943 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18944 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18945 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
18946 // CHECK: ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)18947 void test_vst2_p8(poly8_t * a, poly8x8x2_t b) {
18948 vst2_p8(a, b);
18949 }
18950
18951 // CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x i64] %b.coerce) #0 {
18952 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
18953 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
18954 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
18955 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
18956 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
18957 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
18958 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
18959 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
18960 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18961 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
18962 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
18963 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18964 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18965 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
18966 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18967 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18968 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18969 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18970 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18971 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
18972 // CHECK: ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)18973 void test_vst2_p16(poly16_t * a, poly16x4x2_t b) {
18974 vst2_p16(a, b);
18975 }
18976
18977
18978 // CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [4 x i64] %b.coerce) #0 {
18979 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
18980 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
18981 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
18982 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18983 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
18984 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
18985 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
18986 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
18987 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18988 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
18989 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
18990 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18991 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18992 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
18993 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18994 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18995 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18996 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18997 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18998 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
18999 // CHECK: ret void
test_vst2q_lane_u16(uint16_t * a,uint16x8x2_t b)19000 void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) {
19001 vst2q_lane_u16(a, b, 7);
19002 }
19003
19004 // CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [4 x i64] %b.coerce) #0 {
19005 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
19006 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
19007 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
19008 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19009 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
19010 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
19011 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
19012 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
19013 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19014 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
19015 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
19016 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
19017 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
19018 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
19019 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
19020 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
19021 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
19022 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
19023 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
19024 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
19025 // CHECK: ret void
test_vst2q_lane_u32(uint32_t * a,uint32x4x2_t b)19026 void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) {
19027 vst2q_lane_u32(a, b, 3);
19028 }
19029
19030 // CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [4 x i64] %b.coerce) #0 {
19031 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
19032 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
19033 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
19034 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19035 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
19036 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
19037 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
19038 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
19039 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19040 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
19041 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
19042 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
19043 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
19044 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
19045 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
19046 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
19047 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
19048 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19049 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19050 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
19051 // CHECK: ret void
test_vst2q_lane_s16(int16_t * a,int16x8x2_t b)19052 void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) {
19053 vst2q_lane_s16(a, b, 7);
19054 }
19055
19056 // CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [4 x i64] %b.coerce) #0 {
19057 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
19058 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
19059 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
19060 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19061 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
19062 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
19063 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
19064 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
19065 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19066 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
19067 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
19068 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
19069 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
19070 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
19071 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
19072 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
19073 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
19074 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
19075 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
19076 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
19077 // CHECK: ret void
test_vst2q_lane_s32(int32_t * a,int32x4x2_t b)19078 void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) {
19079 vst2q_lane_s32(a, b, 3);
19080 }
19081
19082 // CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [4 x i64] %b.coerce) #0 {
19083 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
19084 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
19085 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
19086 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
19087 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
19088 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
19089 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
19090 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
19091 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
19092 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
19093 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
19094 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
19095 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
19096 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
19097 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
19098 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
19099 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
19100 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19101 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19102 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
19103 // CHECK: ret void
test_vst2q_lane_f16(float16_t * a,float16x8x2_t b)19104 void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) {
19105 vst2q_lane_f16(a, b, 7);
19106 }
19107
19108 // CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [4 x i64] %b.coerce) #0 {
19109 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
19110 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
19111 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
19112 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
19113 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
19114 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
19115 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
19116 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
19117 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
19118 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
19119 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
19120 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
19121 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
19122 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
19123 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
19124 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
19125 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
19126 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
19127 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
19128 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 3, i32 4)
19129 // CHECK: ret void
test_vst2q_lane_f32(float32_t * a,float32x4x2_t b)19130 void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) {
19131 vst2q_lane_f32(a, b, 3);
19132 }
19133
19134 // CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [4 x i64] %b.coerce) #0 {
19135 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
19136 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
19137 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
19138 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19139 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
19140 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
19141 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
19142 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
19143 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19144 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
19145 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
19146 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
19147 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
19148 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
19149 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
19150 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
19151 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
19152 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19153 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19154 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
19155 // CHECK: ret void
test_vst2q_lane_p16(poly16_t * a,poly16x8x2_t b)19156 void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) {
19157 vst2q_lane_p16(a, b, 7);
19158 }
19159
19160 // CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x i64] %b.coerce) #0 {
19161 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
19162 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
19163 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
19164 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19165 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19166 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
19167 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
19168 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19169 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
19170 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
19171 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19172 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
19173 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19174 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19175 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
19176 // CHECK: ret void
test_vst2_lane_u8(uint8_t * a,uint8x8x2_t b)19177 void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) {
19178 vst2_lane_u8(a, b, 7);
19179 }
19180
19181 // CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x i64] %b.coerce) #0 {
19182 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
19183 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
19184 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
19185 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
19186 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19187 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
19188 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
19189 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19190 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19191 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
19192 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
19193 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19194 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19195 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
19196 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19197 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19198 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19199 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19200 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19201 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
19202 // CHECK: ret void
test_vst2_lane_u16(uint16_t * a,uint16x4x2_t b)19203 void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) {
19204 vst2_lane_u16(a, b, 3);
19205 }
19206
19207 // CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x i64] %b.coerce) #0 {
19208 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
19209 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
19210 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
19211 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
19212 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19213 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
19214 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
19215 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19216 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19217 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
19218 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
19219 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19220 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19221 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
19222 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19223 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19224 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19225 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19226 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19227 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
19228 // CHECK: ret void
test_vst2_lane_u32(uint32_t * a,uint32x2x2_t b)19229 void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) {
19230 vst2_lane_u32(a, b, 1);
19231 }
19232
19233 // CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x i64] %b.coerce) #0 {
19234 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
19235 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
19236 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
19237 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19238 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19239 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
19240 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
19241 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19242 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
19243 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
19244 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19245 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
19246 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19247 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19248 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
19249 // CHECK: ret void
test_vst2_lane_s8(int8_t * a,int8x8x2_t b)19250 void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) {
19251 vst2_lane_s8(a, b, 7);
19252 }
19253
19254 // CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x i64] %b.coerce) #0 {
19255 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
19256 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
19257 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
19258 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
19259 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19260 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
19261 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
19262 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19263 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19264 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
19265 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
19266 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19267 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19268 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
19269 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19270 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19271 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19272 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19273 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19274 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
19275 // CHECK: ret void
test_vst2_lane_s16(int16_t * a,int16x4x2_t b)19276 void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) {
19277 vst2_lane_s16(a, b, 3);
19278 }
19279
19280 // CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x i64] %b.coerce) #0 {
19281 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
19282 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
19283 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
19284 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
19285 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19286 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
19287 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
19288 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19289 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19290 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
19291 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
19292 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19293 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19294 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
19295 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19296 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19297 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19298 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19299 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19300 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
19301 // CHECK: ret void
test_vst2_lane_s32(int32_t * a,int32x2x2_t b)19302 void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) {
19303 vst2_lane_s32(a, b, 1);
19304 }
19305
19306 // CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x i64] %b.coerce) #0 {
19307 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
19308 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
19309 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
19310 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
19311 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19312 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
19313 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
19314 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19315 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
19316 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
19317 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
19318 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
19319 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
19320 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
19321 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
19322 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
19323 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
19324 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19325 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19326 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
19327 // CHECK: ret void
test_vst2_lane_f16(float16_t * a,float16x4x2_t b)19328 void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) {
19329 vst2_lane_f16(a, b, 3);
19330 }
19331
19332 // CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x i64] %b.coerce) #0 {
19333 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
19334 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
19335 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
19336 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
19337 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19338 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
19339 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
19340 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19341 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
19342 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
19343 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
19344 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
19345 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
19346 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
19347 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
19348 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
19349 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
19350 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
19351 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
19352 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 1, i32 4)
19353 // CHECK: ret void
test_vst2_lane_f32(float32_t * a,float32x2x2_t b)19354 void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) {
19355 vst2_lane_f32(a, b, 1);
19356 }
19357
19358 // CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x i64] %b.coerce) #0 {
19359 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
19360 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
19361 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
19362 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19363 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19364 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
19365 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
19366 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19367 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
19368 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
19369 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19370 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
19371 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19372 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19373 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
19374 // CHECK: ret void
test_vst2_lane_p8(poly8_t * a,poly8x8x2_t b)19375 void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) {
19376 vst2_lane_p8(a, b, 7);
19377 }
19378
19379 // CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x i64] %b.coerce) #0 {
19380 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
19381 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
19382 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
19383 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
19384 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19385 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
19386 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
19387 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
19388 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19389 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
19390 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
19391 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19392 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19393 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
19394 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19395 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19396 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19397 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19398 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19399 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
19400 // CHECK: ret void
test_vst2_lane_p16(poly16_t * a,poly16x4x2_t b)19401 void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) {
19402 vst2_lane_p16(a, b, 3);
19403 }
19404
19405
19406 // CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [6 x i64] %b.coerce) #0 {
19407 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
19408 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
19409 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
19410 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
19411 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19412 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
19413 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
19414 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19415 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
19416 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
19417 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
19418 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
19419 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
19420 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
19421 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
19422 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
19423 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
19424 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
19425 // CHECK: ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)19426 void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) {
19427 vst3q_u8(a, b);
19428 }
19429
19430 // CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [6 x i64] %b.coerce) #0 {
19431 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
19432 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
19433 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
19434 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
19435 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19436 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
19437 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
19438 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19439 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19440 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
19441 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
19442 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
19443 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
19444 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
19445 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
19446 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
19447 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
19448 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
19449 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
19450 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
19451 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
19452 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19453 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19454 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
19455 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
19456 // CHECK: ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)19457 void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) {
19458 vst3q_u16(a, b);
19459 }
19460
19461 // CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [6 x i64] %b.coerce) #0 {
19462 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
19463 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
19464 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
19465 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
19466 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19467 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
19468 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
19469 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19470 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19471 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
19472 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
19473 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
19474 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
19475 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
19476 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
19477 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
19478 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
19479 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
19480 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
19481 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
19482 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
19483 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
19484 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
19485 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
19486 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
19487 // CHECK: ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)19488 void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) {
19489 vst3q_u32(a, b);
19490 }
19491
19492 // CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [6 x i64] %b.coerce) #0 {
19493 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
19494 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
19495 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
19496 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
19497 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19498 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
19499 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
19500 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19501 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
19502 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
19503 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
19504 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
19505 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
19506 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
19507 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
19508 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
19509 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
19510 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
19511 // CHECK: ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)19512 void test_vst3q_s8(int8_t * a, int8x16x3_t b) {
19513 vst3q_s8(a, b);
19514 }
19515
19516 // CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [6 x i64] %b.coerce) #0 {
19517 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
19518 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
19519 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
19520 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
19521 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19522 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
19523 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
19524 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19525 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19526 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
19527 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
19528 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
19529 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
19530 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
19531 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
19532 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
19533 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
19534 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
19535 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
19536 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
19537 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
19538 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19539 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19540 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
19541 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
19542 // CHECK: ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)19543 void test_vst3q_s16(int16_t * a, int16x8x3_t b) {
19544 vst3q_s16(a, b);
19545 }
19546
19547 // CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [6 x i64] %b.coerce) #0 {
19548 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
19549 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
19550 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
19551 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
19552 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19553 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
19554 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
19555 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19556 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19557 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
19558 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
19559 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
19560 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
19561 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
19562 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
19563 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
19564 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
19565 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
19566 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
19567 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
19568 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
19569 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
19570 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
19571 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
19572 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
19573 // CHECK: ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)19574 void test_vst3q_s32(int32_t * a, int32x4x3_t b) {
19575 vst3q_s32(a, b);
19576 }
19577
19578 // CHECK-LABEL: define void @test_vst3q_f16(half* %a, [6 x i64] %b.coerce) #0 {
19579 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
19580 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
19581 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
19582 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
19583 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19584 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
19585 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
19586 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19587 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
19588 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
19589 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
19590 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
19591 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
19592 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
19593 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
19594 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
19595 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
19596 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
19597 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
19598 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
19599 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
19600 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19601 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19602 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
19603 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
19604 // CHECK: ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)19605 void test_vst3q_f16(float16_t * a, float16x8x3_t b) {
19606 vst3q_f16(a, b);
19607 }
19608
19609 // CHECK-LABEL: define void @test_vst3q_f32(float* %a, [6 x i64] %b.coerce) #0 {
19610 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
19611 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
19612 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
19613 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
19614 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19615 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
19616 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
19617 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19618 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
19619 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
19620 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
19621 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
19622 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
19623 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
19624 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
19625 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
19626 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
19627 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
19628 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
19629 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
19630 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
19631 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
19632 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
19633 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
19634 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 4)
19635 // CHECK: ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)19636 void test_vst3q_f32(float32_t * a, float32x4x3_t b) {
19637 vst3q_f32(a, b);
19638 }
19639
19640 // CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [6 x i64] %b.coerce) #0 {
19641 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
19642 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
19643 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
19644 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
19645 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19646 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
19647 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
19648 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19649 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
19650 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
19651 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
19652 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
19653 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
19654 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
19655 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
19656 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
19657 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
19658 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
19659 // CHECK: ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)19660 void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) {
19661 vst3q_p8(a, b);
19662 }
19663
19664 // CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [6 x i64] %b.coerce) #0 {
19665 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
19666 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
19667 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
19668 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
19669 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
19670 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
19671 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
19672 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
19673 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19674 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
19675 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
19676 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
19677 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
19678 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
19679 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
19680 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
19681 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
19682 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
19683 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
19684 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
19685 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
19686 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19687 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19688 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
19689 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
19690 // CHECK: ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)19691 void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) {
19692 vst3q_p16(a, b);
19693 }
19694
19695 // CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x i64] %b.coerce) #0 {
19696 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
19697 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
19698 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
19699 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19700 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19701 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
19702 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
19703 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19704 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
19705 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
19706 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19707 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
19708 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19709 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19710 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
19711 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19712 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19713 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
19714 // CHECK: ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)19715 void test_vst3_u8(uint8_t * a, uint8x8x3_t b) {
19716 vst3_u8(a, b);
19717 }
19718
19719 // CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x i64] %b.coerce) #0 {
19720 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
19721 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
19722 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
19723 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
19724 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19725 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
19726 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
19727 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19728 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19729 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
19730 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
19731 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19732 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19733 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
19734 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19735 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19736 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19737 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
19738 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19739 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19740 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19741 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19742 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19743 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19744 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
19745 // CHECK: ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)19746 void test_vst3_u16(uint16_t * a, uint16x4x3_t b) {
19747 vst3_u16(a, b);
19748 }
19749
19750 // CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x i64] %b.coerce) #0 {
19751 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
19752 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
19753 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
19754 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
19755 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19756 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
19757 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
19758 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19759 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19760 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
19761 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
19762 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19763 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19764 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
19765 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19766 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19767 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19768 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
19769 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19770 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19771 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19772 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19773 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19774 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19775 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
19776 // CHECK: ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)19777 void test_vst3_u32(uint32_t * a, uint32x2x3_t b) {
19778 vst3_u32(a, b);
19779 }
19780
19781 // CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x i64] %b.coerce) #0 {
19782 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
19783 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
19784 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
19785 // CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
19786 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19787 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
19788 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
19789 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19790 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
19791 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
19792 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
19793 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
19794 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
19795 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
19796 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
19797 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
19798 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
19799 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
19800 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
19801 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
19802 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
19803 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
19804 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
19805 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
19806 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
19807 // CHECK: ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)19808 void test_vst3_u64(uint64_t * a, uint64x1x3_t b) {
19809 vst3_u64(a, b);
19810 }
19811
19812 // CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x i64] %b.coerce) #0 {
19813 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
19814 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
19815 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
19816 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19817 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19818 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
19819 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
19820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19821 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
19822 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
19823 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19824 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
19825 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19826 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19827 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
19828 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19829 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19830 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
19831 // CHECK: ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)19832 void test_vst3_s8(int8_t * a, int8x8x3_t b) {
19833 vst3_s8(a, b);
19834 }
19835
19836 // CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x i64] %b.coerce) #0 {
19837 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
19838 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
19839 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
19840 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
19841 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19842 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
19843 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
19844 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19845 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19846 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
19847 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
19848 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19849 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19850 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
19851 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19852 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19853 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19854 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
19855 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19856 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19857 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19858 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19859 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19860 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19861 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
19862 // CHECK: ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)19863 void test_vst3_s16(int16_t * a, int16x4x3_t b) {
19864 vst3_s16(a, b);
19865 }
19866
19867 // CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x i64] %b.coerce) #0 {
19868 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
19869 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
19870 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
19871 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
19872 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19873 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
19874 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
19875 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19876 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19877 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
19878 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
19879 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19880 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19881 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
19882 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19883 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19884 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19885 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
19886 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19887 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19888 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19889 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19890 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19891 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19892 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
19893 // CHECK: ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)19894 void test_vst3_s32(int32_t * a, int32x2x3_t b) {
19895 vst3_s32(a, b);
19896 }
19897
19898 // CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x i64] %b.coerce) #0 {
19899 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
19900 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
19901 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
19902 // CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
19903 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19904 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
19905 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
19906 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19907 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
19908 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
19909 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
19910 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
19911 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
19912 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
19913 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
19914 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
19915 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
19916 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
19917 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
19918 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
19919 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
19920 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
19921 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
19922 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
19923 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
19924 // CHECK: ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)19925 void test_vst3_s64(int64_t * a, int64x1x3_t b) {
19926 vst3_s64(a, b);
19927 }
19928
19929 // CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x i64] %b.coerce) #0 {
19930 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
19931 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
19932 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
19933 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
19934 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19935 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
19936 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
19937 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19938 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
19939 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
19940 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
19941 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
19942 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
19943 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
19944 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
19945 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
19946 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
19947 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
19948 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
19949 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
19950 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
19951 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19952 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19953 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19954 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
19955 // CHECK: ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)19956 void test_vst3_f16(float16_t * a, float16x4x3_t b) {
19957 vst3_f16(a, b);
19958 }
19959
19960 // CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x i64] %b.coerce) #0 {
19961 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
19962 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
19963 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
19964 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
19965 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19966 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
19967 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
19968 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
19969 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
19970 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
19971 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
19972 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
19973 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
19974 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
19975 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
19976 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
19977 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
19978 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
19979 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
19980 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
19981 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
19982 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
19983 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
19984 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
19985 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 4)
19986 // CHECK: ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)19987 void test_vst3_f32(float32_t * a, float32x2x3_t b) {
19988 vst3_f32(a, b);
19989 }
19990
19991 // CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x i64] %b.coerce) #0 {
19992 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
19993 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
19994 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
19995 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19996 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
19997 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
19998 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
19999 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20000 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
20001 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
20002 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
20003 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
20004 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
20005 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
20006 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
20007 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
20008 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
20009 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
20010 // CHECK: ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)20011 void test_vst3_p8(poly8_t * a, poly8x8x3_t b) {
20012 vst3_p8(a, b);
20013 }
20014
20015 // CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x i64] %b.coerce) #0 {
20016 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
20017 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
20018 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
20019 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
20020 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20021 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
20022 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
20023 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20024 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20025 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
20026 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
20027 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
20028 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
20029 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
20030 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
20031 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
20032 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
20033 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
20034 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
20035 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
20036 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
20037 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
20038 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
20039 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
20040 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
20041 // CHECK: ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)20042 void test_vst3_p16(poly16_t * a, poly16x4x3_t b) {
20043 vst3_p16(a, b);
20044 }
20045
20046
20047 // CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [6 x i64] %b.coerce) #0 {
20048 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
20049 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
20050 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
20051 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
20052 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
20053 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
20054 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
20055 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
20056 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20057 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
20058 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
20059 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
20060 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
20061 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
20062 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
20063 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
20064 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
20065 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
20066 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
20067 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
20068 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
20069 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
20070 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
20071 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
20072 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
20073 // CHECK: ret void
test_vst3q_lane_u16(uint16_t * a,uint16x8x3_t b)20074 void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) {
20075 vst3q_lane_u16(a, b, 7);
20076 }
20077
20078 // CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [6 x i64] %b.coerce) #0 {
20079 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
20080 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
20081 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
20082 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
20083 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
20084 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
20085 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
20086 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
20087 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
20088 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
20089 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
20090 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
20091 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
20092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
20093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
20094 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
20095 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
20096 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
20097 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
20098 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
20099 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
20100 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
20101 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
20102 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
20103 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
20104 // CHECK: ret void
test_vst3q_lane_u32(uint32_t * a,uint32x4x3_t b)20105 void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) {
20106 vst3q_lane_u32(a, b, 3);
20107 }
20108
20109 // CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [6 x i64] %b.coerce) #0 {
20110 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
20111 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
20112 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
20113 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
20114 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
20115 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
20116 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
20117 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
20118 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20119 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
20120 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
20121 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
20122 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
20123 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
20124 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
20125 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
20126 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
20127 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
20128 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
20129 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
20130 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
20131 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
20132 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
20133 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
20134 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
20135 // CHECK: ret void
test_vst3q_lane_s16(int16_t * a,int16x8x3_t b)20136 void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) {
20137 vst3q_lane_s16(a, b, 7);
20138 }
20139
20140 // CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [6 x i64] %b.coerce) #0 {
20141 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
20142 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
20143 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
20144 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
20145 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
20146 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
20147 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
20148 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
20149 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
20150 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
20151 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
20152 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
20153 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
20154 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
20155 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
20156 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
20157 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
20158 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
20159 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
20160 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
20161 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
20162 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
20163 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
20164 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
20165 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
20166 // CHECK: ret void
test_vst3q_lane_s32(int32_t * a,int32x4x3_t b)20167 void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) {
20168 vst3q_lane_s32(a, b, 3);
20169 }
20170
20171 // CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [6 x i64] %b.coerce) #0 {
20172 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
20173 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
20174 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
20175 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
20176 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
20177 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
20178 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
20179 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
20180 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
20181 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
20182 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
20183 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
20184 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
20185 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
20186 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
20187 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
20188 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
20189 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
20190 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
20191 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
20192 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
20193 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
20194 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
20195 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
20196 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
20197 // CHECK: ret void
test_vst3q_lane_f16(float16_t * a,float16x8x3_t b)20198 void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) {
20199 vst3q_lane_f16(a, b, 7);
20200 }
20201
20202 // CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [6 x i64] %b.coerce) #0 {
20203 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
20204 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
20205 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
20206 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
20207 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
20208 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
20209 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
20210 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
20211 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
20212 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
20213 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
20214 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
20215 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
20216 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
20217 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
20218 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
20219 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
20220 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
20221 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
20222 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
20223 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
20224 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
20225 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
20226 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
20227 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 3, i32 4)
20228 // CHECK: ret void
test_vst3q_lane_f32(float32_t * a,float32x4x3_t b)20229 void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) {
20230 vst3q_lane_f32(a, b, 3);
20231 }
20232
20233 // CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [6 x i64] %b.coerce) #0 {
20234 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
20235 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
20236 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
20237 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
20238 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
20239 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
20240 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
20241 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
20242 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20243 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
20244 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
20245 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
20246 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
20247 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
20248 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
20249 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
20250 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
20251 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
20252 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
20253 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
20254 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
20255 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
20256 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
20257 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
20258 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
20259 // CHECK: ret void
test_vst3q_lane_p16(poly16_t * a,poly16x8x3_t b)20260 void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) {
20261 vst3q_lane_p16(a, b, 7);
20262 }
20263
20264 // CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x i64] %b.coerce) #0 {
20265 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
20266 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
20267 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
20268 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20269 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20270 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
20271 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
20272 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20273 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
20274 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
20275 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
20276 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
20277 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
20278 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
20279 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
20280 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
20281 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
20282 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
20283 // CHECK: ret void
test_vst3_lane_u8(uint8_t * a,uint8x8x3_t b)20284 void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) {
20285 vst3_lane_u8(a, b, 7);
20286 }
20287
20288 // CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x i64] %b.coerce) #0 {
20289 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
20290 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
20291 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
20292 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
20293 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20294 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
20295 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
20296 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20297 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20298 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
20299 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
20300 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
20301 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
20302 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
20303 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
20304 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
20305 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
20306 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
20307 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
20308 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
20309 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
20310 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
20311 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
20312 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
20313 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
20314 // CHECK: ret void
test_vst3_lane_u16(uint16_t * a,uint16x4x3_t b)20315 void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) {
20316 vst3_lane_u16(a, b, 3);
20317 }
20318
20319 // CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x i64] %b.coerce) #0 {
20320 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
20321 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
20322 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
20323 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
20324 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20325 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
20326 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
20327 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20328 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
20329 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
20330 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
20331 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
20332 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
20333 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
20334 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
20335 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
20336 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
20337 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
20338 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
20339 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
20340 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
20341 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
20342 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
20343 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
20344 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
20345 // CHECK: ret void
test_vst3_lane_u32(uint32_t * a,uint32x2x3_t b)20346 void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) {
20347 vst3_lane_u32(a, b, 1);
20348 }
20349
20350 // CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x i64] %b.coerce) #0 {
20351 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
20352 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
20353 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
20354 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20355 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20356 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
20357 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
20358 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20359 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
20360 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
20361 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
20362 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
20363 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
20364 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
20365 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
20366 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
20367 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
20368 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
20369 // CHECK: ret void
test_vst3_lane_s8(int8_t * a,int8x8x3_t b)20370 void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) {
20371 vst3_lane_s8(a, b, 7);
20372 }
20373
20374 // CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x i64] %b.coerce) #0 {
20375 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
20376 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
20377 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
20378 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
20379 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20380 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
20381 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
20382 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20383 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20384 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
20385 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
20386 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
20387 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
20388 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
20389 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
20390 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
20391 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
20392 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
20393 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
20394 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
20395 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
20396 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
20397 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
20398 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
20399 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
20400 // CHECK: ret void
test_vst3_lane_s16(int16_t * a,int16x4x3_t b)20401 void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) {
20402 vst3_lane_s16(a, b, 3);
20403 }
20404
20405 // CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x i64] %b.coerce) #0 {
20406 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
20407 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
20408 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
20409 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
20410 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20411 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
20412 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
20413 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20414 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
20415 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
20416 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
20417 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
20418 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
20419 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
20420 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
20421 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
20422 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
20423 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
20424 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
20425 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
20426 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
20427 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
20428 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
20429 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
20430 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
20431 // CHECK: ret void
test_vst3_lane_s32(int32_t * a,int32x2x3_t b)20432 void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) {
20433 vst3_lane_s32(a, b, 1);
20434 }
20435
20436 // CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x i64] %b.coerce) #0 {
20437 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
20438 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
20439 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
20440 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
20441 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20442 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
20443 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
20444 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20445 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
20446 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
20447 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
20448 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
20449 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
20450 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
20451 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
20452 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
20453 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
20454 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
20455 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
20456 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
20457 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
20458 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
20459 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
20460 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
20461 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
20462 // CHECK: ret void
test_vst3_lane_f16(float16_t * a,float16x4x3_t b)20463 void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) {
20464 vst3_lane_f16(a, b, 3);
20465 }
20466
20467 // CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x i64] %b.coerce) #0 {
20468 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
20469 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
20470 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
20471 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
20472 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20473 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
20474 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
20475 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20476 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
20477 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
20478 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
20479 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
20480 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
20481 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
20482 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
20483 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
20484 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
20485 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
20486 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
20487 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
20488 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
20489 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
20490 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
20491 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
20492 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 1, i32 4)
20493 // CHECK: ret void
test_vst3_lane_f32(float32_t * a,float32x2x3_t b)20494 void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) {
20495 vst3_lane_f32(a, b, 1);
20496 }
20497
20498 // CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x i64] %b.coerce) #0 {
20499 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
20500 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
20501 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
20502 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20503 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20504 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
20505 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
20506 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20507 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
20508 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
20509 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
20510 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
20511 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
20512 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
20513 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
20514 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
20515 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
20516 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
20517 // CHECK: ret void
test_vst3_lane_p8(poly8_t * a,poly8x8x3_t b)20518 void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) {
20519 vst3_lane_p8(a, b, 7);
20520 }
20521
20522 // CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x i64] %b.coerce) #0 {
20523 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
20524 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
20525 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
20526 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
20527 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20528 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
20529 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
20530 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
20531 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20532 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
20533 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
20534 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
20535 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
20536 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
20537 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
20538 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
20539 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
20540 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
20541 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
20542 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
20543 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
20544 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
20545 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
20546 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
20547 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
20548 // CHECK: ret void
test_vst3_lane_p16(poly16_t * a,poly16x4x3_t b)20549 void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) {
20550 vst3_lane_p16(a, b, 3);
20551 }
20552
20553
20554 // CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [8 x i64] %b.coerce) #0 {
20555 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
20556 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
20557 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
20558 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
20559 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20560 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
20561 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
20562 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20563 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
20564 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
20565 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
20566 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
20567 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
20568 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
20569 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
20570 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
20571 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
20572 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
20573 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
20574 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
20575 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
20576 // CHECK: ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)20577 void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) {
20578 vst4q_u8(a, b);
20579 }
20580
20581 // CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [8 x i64] %b.coerce) #0 {
20582 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
20583 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
20584 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
20585 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
20586 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20587 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
20588 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
20589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20590 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20591 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
20592 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
20593 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
20594 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
20595 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
20596 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
20597 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
20598 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
20599 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
20600 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
20601 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
20602 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
20603 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
20604 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
20605 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
20606 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
20607 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
20608 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
20609 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
20610 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
20611 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
20612 // CHECK: ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)20613 void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) {
20614 vst4q_u16(a, b);
20615 }
20616
20617 // CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [8 x i64] %b.coerce) #0 {
20618 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
20619 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
20620 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
20621 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
20622 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20623 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
20624 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
20625 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20626 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
20627 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
20628 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
20629 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
20630 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
20631 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
20632 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
20633 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
20634 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
20635 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
20636 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
20637 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
20638 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
20639 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
20640 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
20641 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
20642 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
20643 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
20644 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
20645 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
20646 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
20647 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
20648 // CHECK: ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)20649 void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) {
20650 vst4q_u32(a, b);
20651 }
20652
20653 // CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [8 x i64] %b.coerce) #0 {
20654 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
20655 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
20656 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
20657 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
20658 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20659 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
20660 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
20661 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20662 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
20663 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
20664 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
20665 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
20666 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
20667 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
20668 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
20669 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
20670 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
20671 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
20672 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
20673 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
20674 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
20675 // CHECK: ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)20676 void test_vst4q_s8(int8_t * a, int8x16x4_t b) {
20677 vst4q_s8(a, b);
20678 }
20679
20680 // CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [8 x i64] %b.coerce) #0 {
20681 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
20682 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
20683 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
20684 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
20685 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20686 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
20687 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
20688 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20689 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20690 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
20691 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
20692 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
20693 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
20694 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
20695 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
20696 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
20697 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
20698 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
20699 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
20700 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
20701 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
20702 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
20703 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
20704 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
20705 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
20706 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
20707 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
20708 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
20709 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
20710 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
20711 // CHECK: ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)20712 void test_vst4q_s16(int16_t * a, int16x8x4_t b) {
20713 vst4q_s16(a, b);
20714 }
20715
20716 // CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [8 x i64] %b.coerce) #0 {
20717 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
20718 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
20719 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
20720 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
20721 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20722 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
20723 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
20724 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20725 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
20726 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
20727 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
20728 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
20729 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
20730 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
20731 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
20732 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
20733 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
20734 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
20735 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
20736 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
20737 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
20738 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
20739 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
20740 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
20741 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
20742 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
20743 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
20744 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
20745 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
20746 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
20747 // CHECK: ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)20748 void test_vst4q_s32(int32_t * a, int32x4x4_t b) {
20749 vst4q_s32(a, b);
20750 }
20751
20752 // CHECK-LABEL: define void @test_vst4q_f16(half* %a, [8 x i64] %b.coerce) #0 {
20753 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
20754 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
20755 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
20756 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
20757 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20758 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
20759 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
20760 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20761 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
20762 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
20763 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
20764 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
20765 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
20766 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
20767 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
20768 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
20769 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
20770 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
20771 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
20772 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
20773 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
20774 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
20775 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
20776 // CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
20777 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
20778 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
20779 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
20780 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
20781 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
20782 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
20783 // CHECK: ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)20784 void test_vst4q_f16(float16_t * a, float16x8x4_t b) {
20785 vst4q_f16(a, b);
20786 }
20787
20788 // CHECK-LABEL: define void @test_vst4q_f32(float* %a, [8 x i64] %b.coerce) #0 {
20789 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
20790 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
20791 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
20792 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
20793 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20794 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
20795 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
20796 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20797 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
20798 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
20799 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
20800 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
20801 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
20802 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
20803 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
20804 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
20805 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
20806 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
20807 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
20808 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
20809 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
20810 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
20811 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
20812 // CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
20813 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
20814 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
20815 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
20816 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
20817 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
20818 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 4)
20819 // CHECK: ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)20820 void test_vst4q_f32(float32_t * a, float32x4x4_t b) {
20821 vst4q_f32(a, b);
20822 }
20823
20824 // CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [8 x i64] %b.coerce) #0 {
20825 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
20826 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
20827 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
20828 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
20829 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20830 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
20831 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
20832 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20833 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
20834 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
20835 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
20836 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
20837 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
20838 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
20839 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
20840 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
20841 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
20842 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
20843 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
20844 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
20845 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
20846 // CHECK: ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)20847 void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) {
20848 vst4q_p8(a, b);
20849 }
20850
20851 // CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [8 x i64] %b.coerce) #0 {
20852 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
20853 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
20854 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
20855 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
20856 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
20857 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
20858 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
20859 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
20860 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20861 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
20862 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
20863 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
20864 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
20865 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
20866 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
20867 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
20868 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
20869 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
20870 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
20871 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
20872 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
20873 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
20874 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
20875 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
20876 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
20877 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
20878 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
20879 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
20880 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
20881 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
20882 // CHECK: ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)20883 void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) {
20884 vst4q_p16(a, b);
20885 }
20886
20887 // CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x i64] %b.coerce) #0 {
20888 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
20889 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
20890 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
20891 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20892 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20893 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
20894 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
20895 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
20896 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
20897 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
20898 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
20899 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
20900 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
20901 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
20902 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
20903 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
20904 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
20905 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
20906 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
20907 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
20908 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
20909 // CHECK: ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)20910 void test_vst4_u8(uint8_t * a, uint8x8x4_t b) {
20911 vst4_u8(a, b);
20912 }
20913
20914 // CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x i64] %b.coerce) #0 {
20915 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
20916 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
20917 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
20918 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
20919 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20920 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
20921 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
20922 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
20923 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
20924 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
20925 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
20926 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
20927 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
20928 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
20929 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
20930 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
20931 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
20932 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
20933 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
20934 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
20935 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
20936 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
20937 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
20938 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
20939 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
20940 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
20941 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
20942 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
20943 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
20944 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
20945 // CHECK: ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)20946 void test_vst4_u16(uint16_t * a, uint16x4x4_t b) {
20947 vst4_u16(a, b);
20948 }
20949
20950 // CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x i64] %b.coerce) #0 {
20951 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
20952 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
20953 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
20954 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
20955 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20956 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
20957 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
20958 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
20959 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
20960 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
20961 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
20962 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
20963 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
20964 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
20965 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
20966 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
20967 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
20968 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
20969 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
20970 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
20971 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
20972 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
20973 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
20974 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
20975 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
20976 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
20977 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
20978 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
20979 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
20980 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
20981 // CHECK: ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)20982 void test_vst4_u32(uint32_t * a, uint32x2x4_t b) {
20983 vst4_u32(a, b);
20984 }
20985
20986 // CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x i64] %b.coerce) #0 {
20987 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
20988 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
20989 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
20990 // CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
20991 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20992 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
20993 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
20994 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
20995 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
20996 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
20997 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
20998 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
20999 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
21000 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
21001 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
21002 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
21003 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
21004 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
21005 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
21006 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
21007 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
21008 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
21009 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
21010 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
21011 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
21012 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
21013 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
21014 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
21015 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
21016 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
21017 // CHECK: ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)21018 void test_vst4_u64(uint64_t * a, uint64x1x4_t b) {
21019 vst4_u64(a, b);
21020 }
21021
21022 // CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x i64] %b.coerce) #0 {
21023 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
21024 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
21025 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
21026 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
21027 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21028 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
21029 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
21030 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21031 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
21032 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
21033 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
21034 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
21035 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
21036 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
21037 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
21038 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
21039 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
21040 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
21041 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
21042 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
21043 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
21044 // CHECK: ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)21045 void test_vst4_s8(int8_t * a, int8x8x4_t b) {
21046 vst4_s8(a, b);
21047 }
21048
21049 // CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x i64] %b.coerce) #0 {
21050 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
21051 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
21052 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
21053 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
21054 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21055 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
21056 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
21057 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21058 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
21059 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
21060 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
21061 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
21062 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
21063 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
21064 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
21065 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
21066 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
21067 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
21068 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
21069 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
21070 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
21071 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
21072 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
21073 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
21074 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
21075 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
21076 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
21077 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
21078 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
21079 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
21080 // CHECK: ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)21081 void test_vst4_s16(int16_t * a, int16x4x4_t b) {
21082 vst4_s16(a, b);
21083 }
21084
21085 // CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x i64] %b.coerce) #0 {
21086 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
21087 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
21088 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
21089 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
21090 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21091 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
21092 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
21093 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21094 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
21095 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
21096 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
21097 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
21098 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
21099 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
21100 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
21101 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
21102 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
21103 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
21104 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
21105 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
21106 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
21107 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
21108 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
21109 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
21110 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
21111 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
21112 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
21113 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
21114 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
21115 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
21116 // CHECK: ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)21117 void test_vst4_s32(int32_t * a, int32x2x4_t b) {
21118 vst4_s32(a, b);
21119 }
21120
21121 // CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x i64] %b.coerce) #0 {
21122 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
21123 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
21124 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
21125 // CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
21126 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21127 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
21128 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
21129 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21130 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
21131 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
21132 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
21133 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
21134 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
21135 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
21136 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
21137 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
21138 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
21139 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
21140 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
21141 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
21142 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
21143 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
21144 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
21145 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
21146 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
21147 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
21148 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
21149 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
21150 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
21151 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
21152 // CHECK: ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)21153 void test_vst4_s64(int64_t * a, int64x1x4_t b) {
21154 vst4_s64(a, b);
21155 }
21156
21157 // CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x i64] %b.coerce) #0 {
21158 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
21159 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
21160 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
21161 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
21162 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21163 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
21164 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
21165 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21166 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
21167 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
21168 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
21169 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
21170 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
21171 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
21172 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
21173 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
21174 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
21175 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
21176 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
21177 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
21178 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
21179 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
21180 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
21181 // CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
21182 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
21183 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
21184 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
21185 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
21186 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
21187 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
21188 // CHECK: ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)21189 void test_vst4_f16(float16_t * a, float16x4x4_t b) {
21190 vst4_f16(a, b);
21191 }
21192
21193 // CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x i64] %b.coerce) #0 {
21194 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
21195 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
21196 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
21197 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
21198 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21199 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
21200 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
21201 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21202 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
21203 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
21204 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
21205 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
21206 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
21207 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
21208 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
21209 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
21210 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
21211 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
21212 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
21213 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
21214 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
21215 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
21216 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
21217 // CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
21218 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
21219 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
21220 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
21221 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
21222 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
21223 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 4)
21224 // CHECK: ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)21225 void test_vst4_f32(float32_t * a, float32x2x4_t b) {
21226 vst4_f32(a, b);
21227 }
21228
21229 // CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x i64] %b.coerce) #0 {
21230 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
21231 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
21232 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
21233 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
21234 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21235 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
21236 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
21237 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21238 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
21239 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
21240 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
21241 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
21242 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
21243 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
21244 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
21245 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
21246 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
21247 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
21248 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
21249 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
21250 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
21251 // CHECK: ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)21252 void test_vst4_p8(poly8_t * a, poly8x8x4_t b) {
21253 vst4_p8(a, b);
21254 }
21255
21256 // CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x i64] %b.coerce) #0 {
21257 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
21258 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
21259 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
21260 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
21261 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21262 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
21263 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
21264 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21265 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
21266 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
21267 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
21268 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
21269 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
21270 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
21271 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
21272 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
21273 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
21274 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
21275 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
21276 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
21277 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
21278 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
21279 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
21280 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
21281 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
21282 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
21283 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
21284 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
21285 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
21286 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
21287 // CHECK: ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)21288 void test_vst4_p16(poly16_t * a, poly16x4x4_t b) {
21289 vst4_p16(a, b);
21290 }
21291
21292
21293 // CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [8 x i64] %b.coerce) #0 {
21294 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
21295 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
21296 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
21297 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
21298 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
21299 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
21300 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
21301 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
21302 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
21303 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
21304 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
21305 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
21306 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
21307 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
21308 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
21309 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
21310 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
21311 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
21312 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
21313 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
21314 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
21315 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
21316 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
21317 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
21318 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
21319 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
21320 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
21321 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
21322 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
21323 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
21324 // CHECK: ret void
test_vst4q_lane_u16(uint16_t * a,uint16x8x4_t b)21325 void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) {
21326 vst4q_lane_u16(a, b, 7);
21327 }
21328
21329 // CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [8 x i64] %b.coerce) #0 {
21330 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
21331 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
21332 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
21333 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
21334 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
21335 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
21336 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
21337 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
21338 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
21339 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
21340 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
21341 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
21342 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
21343 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
21344 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
21345 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
21346 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
21347 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
21348 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
21349 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
21350 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
21351 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
21352 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
21353 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
21354 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
21355 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
21356 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
21357 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
21358 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
21359 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
21360 // CHECK: ret void
test_vst4q_lane_u32(uint32_t * a,uint32x4x4_t b)21361 void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) {
21362 vst4q_lane_u32(a, b, 3);
21363 }
21364
21365 // CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [8 x i64] %b.coerce) #0 {
21366 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
21367 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
21368 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
21369 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
21370 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
21371 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
21372 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
21373 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
21374 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
21375 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
21376 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
21377 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
21378 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
21379 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
21380 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
21381 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
21382 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
21383 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
21384 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
21385 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
21386 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
21387 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
21388 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
21389 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
21390 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
21391 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
21392 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
21393 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
21394 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
21395 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
21396 // CHECK: ret void
test_vst4q_lane_s16(int16_t * a,int16x8x4_t b)21397 void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) {
21398 vst4q_lane_s16(a, b, 7);
21399 }
21400
21401 // CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [8 x i64] %b.coerce) #0 {
21402 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
21403 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
21404 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
21405 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
21406 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
21407 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
21408 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
21409 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
21410 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
21411 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
21412 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
21413 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
21414 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
21415 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
21416 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
21417 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
21418 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
21419 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
21420 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
21421 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
21422 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
21423 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
21424 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
21425 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
21426 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
21427 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
21428 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
21429 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
21430 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
21431 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
21432 // CHECK: ret void
test_vst4q_lane_s32(int32_t * a,int32x4x4_t b)21433 void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) {
21434 vst4q_lane_s32(a, b, 3);
21435 }
21436
21437 // CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [8 x i64] %b.coerce) #0 {
21438 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
21439 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
21440 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
21441 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
21442 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
21443 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
21444 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
21445 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
21446 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
21447 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
21448 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
21449 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
21450 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
21451 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
21452 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
21453 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
21454 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
21455 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
21456 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
21457 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
21458 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
21459 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
21460 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
21461 // CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
21462 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
21463 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
21464 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
21465 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
21466 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
21467 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
21468 // CHECK: ret void
test_vst4q_lane_f16(float16_t * a,float16x8x4_t b)21469 void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) {
21470 vst4q_lane_f16(a, b, 7);
21471 }
21472
21473 // CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [8 x i64] %b.coerce) #0 {
21474 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
21475 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
21476 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
21477 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
21478 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
21479 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
21480 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
21481 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
21482 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
21483 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
21484 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
21485 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
21486 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
21487 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
21488 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
21489 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
21490 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
21491 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
21492 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
21493 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
21494 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
21495 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
21496 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
21497 // CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
21498 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
21499 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
21500 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
21501 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
21502 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
21503 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 3, i32 4)
21504 // CHECK: ret void
test_vst4q_lane_f32(float32_t * a,float32x4x4_t b)21505 void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) {
21506 vst4q_lane_f32(a, b, 3);
21507 }
21508
21509 // CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [8 x i64] %b.coerce) #0 {
21510 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
21511 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
21512 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
21513 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
21514 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
21515 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
21516 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
21517 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
21518 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
21519 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
21520 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
21521 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
21522 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
21523 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
21524 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
21525 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
21526 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
21527 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
21528 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
21529 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
21530 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
21531 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
21532 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
21533 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
21534 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
21535 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
21536 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
21537 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
21538 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
21539 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
21540 // CHECK: ret void
test_vst4q_lane_p16(poly16_t * a,poly16x8x4_t b)21541 void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) {
21542 vst4q_lane_p16(a, b, 7);
21543 }
21544
21545 // CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x i64] %b.coerce) #0 {
21546 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
21547 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
21548 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
21549 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
21550 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21551 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
21552 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
21553 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21554 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
21555 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
21556 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
21557 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
21558 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
21559 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
21560 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
21561 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
21562 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
21563 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
21564 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
21565 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
21566 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
21567 // CHECK: ret void
test_vst4_lane_u8(uint8_t * a,uint8x8x4_t b)21568 void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) {
21569 vst4_lane_u8(a, b, 7);
21570 }
21571
21572 // CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x i64] %b.coerce) #0 {
21573 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
21574 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
21575 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
21576 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
21577 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21578 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
21579 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
21580 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21581 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
21582 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
21583 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
21584 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
21585 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
21586 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
21587 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
21588 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
21589 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
21590 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
21591 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
21592 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
21593 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
21594 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
21595 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
21596 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
21597 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
21598 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
21599 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
21600 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
21601 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
21602 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
21603 // CHECK: ret void
test_vst4_lane_u16(uint16_t * a,uint16x4x4_t b)21604 void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) {
21605 vst4_lane_u16(a, b, 3);
21606 }
21607
21608 // CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x i64] %b.coerce) #0 {
21609 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
21610 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
21611 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
21612 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
21613 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21614 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
21615 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
21616 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21617 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
21618 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
21619 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
21620 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
21621 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
21622 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
21623 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
21624 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
21625 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
21626 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
21627 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
21628 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
21629 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
21630 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
21631 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
21632 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
21633 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
21634 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
21635 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
21636 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
21637 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
21638 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
21639 // CHECK: ret void
test_vst4_lane_u32(uint32_t * a,uint32x2x4_t b)21640 void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) {
21641 vst4_lane_u32(a, b, 1);
21642 }
21643
21644 // CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x i64] %b.coerce) #0 {
21645 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
21646 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
21647 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
21648 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
21649 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21650 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
21651 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
21652 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21653 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
21654 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
21655 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
21656 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
21657 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
21658 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
21659 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
21660 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
21661 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
21662 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
21663 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
21664 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
21665 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
21666 // CHECK: ret void
test_vst4_lane_s8(int8_t * a,int8x8x4_t b)21667 void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) {
21668 vst4_lane_s8(a, b, 7);
21669 }
21670
21671 // CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x i64] %b.coerce) #0 {
21672 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
21673 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
21674 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
21675 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
21676 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21677 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
21678 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
21679 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21680 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
21681 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
21682 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
21683 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
21684 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
21685 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
21686 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
21687 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
21688 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
21689 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
21690 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
21691 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
21692 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
21693 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
21694 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
21695 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
21696 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
21697 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
21698 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
21699 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
21700 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
21701 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
21702 // CHECK: ret void
test_vst4_lane_s16(int16_t * a,int16x4x4_t b)21703 void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) {
21704 vst4_lane_s16(a, b, 3);
21705 }
21706
21707 // CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x i64] %b.coerce) #0 {
21708 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
21709 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
21710 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
21711 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
21712 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21713 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
21714 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
21715 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21716 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
21717 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
21718 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
21719 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
21720 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
21721 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
21722 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
21723 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
21724 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
21725 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
21726 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
21727 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
21728 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
21729 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
21730 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
21731 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
21732 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
21733 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
21734 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
21735 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
21736 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
21737 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
21738 // CHECK: ret void
test_vst4_lane_s32(int32_t * a,int32x2x4_t b)21739 void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) {
21740 vst4_lane_s32(a, b, 1);
21741 }
21742
21743 // CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x i64] %b.coerce) #0 {
21744 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
21745 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
21746 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
21747 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
21748 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21749 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
21750 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
21751 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21752 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
21753 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
21754 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
21755 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
21756 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
21757 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
21758 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
21759 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
21760 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
21761 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
21762 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
21763 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
21764 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
21765 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
21766 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
21767 // CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
21768 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
21769 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
21770 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
21771 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
21772 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
21773 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
21774 // CHECK: ret void
test_vst4_lane_f16(float16_t * a,float16x4x4_t b)21775 void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
21776 vst4_lane_f16(a, b, 3);
21777 }
21778
21779 // CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x i64] %b.coerce) #0 {
21780 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
21781 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
21782 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
21783 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
21784 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21785 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
21786 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
21787 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21788 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
21789 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
21790 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
21791 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
21792 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
21793 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
21794 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
21795 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
21796 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
21797 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
21798 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
21799 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
21800 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
21801 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
21802 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
21803 // CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
21804 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
21805 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
21806 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
21807 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
21808 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
21809 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 1, i32 4)
21810 // CHECK: ret void
test_vst4_lane_f32(float32_t * a,float32x2x4_t b)21811 void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) {
21812 vst4_lane_f32(a, b, 1);
21813 }
21814
21815 // CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x i64] %b.coerce) #0 {
21816 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
21817 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
21818 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
21819 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
21820 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21821 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
21822 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
21823 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21824 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
21825 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
21826 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
21827 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
21828 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
21829 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
21830 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
21831 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
21832 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
21833 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
21834 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
21835 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
21836 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
21837 // CHECK: ret void
test_vst4_lane_p8(poly8_t * a,poly8x8x4_t b)21838 void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) {
21839 vst4_lane_p8(a, b, 7);
21840 }
21841
21842 // CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x i64] %b.coerce) #0 {
21843 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
21844 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
21845 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
21846 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
21847 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
21848 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
21849 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
21850 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
21851 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
21852 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
21853 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
21854 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
21855 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
21856 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
21857 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
21858 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
21859 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
21860 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
21861 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
21862 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
21863 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
21864 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
21865 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
21866 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
21867 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
21868 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
21869 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
21870 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
21871 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
21872 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
21873 // CHECK: ret void
test_vst4_lane_p16(poly16_t * a,poly16x4x4_t b)21874 void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) {
21875 vst4_lane_p16(a, b, 3);
21876 }
21877
21878
21879 // CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %a, <8 x i8> %b) #0 {
21880 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
21881 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t a,int8x8_t b)21882 int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) {
21883 return vsub_s8(a, b);
21884 }
21885
21886 // CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %a, <4 x i16> %b) #0 {
21887 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
21888 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t a,int16x4_t b)21889 int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) {
21890 return vsub_s16(a, b);
21891 }
21892
21893 // CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %a, <2 x i32> %b) #0 {
21894 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
21895 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t a,int32x2_t b)21896 int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) {
21897 return vsub_s32(a, b);
21898 }
21899
21900 // CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %a, <1 x i64> %b) #0 {
21901 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
21902 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t a,int64x1_t b)21903 int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) {
21904 return vsub_s64(a, b);
21905 }
21906
21907 // CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %a, <2 x float> %b) #0 {
21908 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, %b
21909 // CHECK: ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t a,float32x2_t b)21910 float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) {
21911 return vsub_f32(a, b);
21912 }
21913
21914 // CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %a, <8 x i8> %b) #0 {
21915 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
21916 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t a,uint8x8_t b)21917 uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) {
21918 return vsub_u8(a, b);
21919 }
21920
21921 // CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %a, <4 x i16> %b) #0 {
21922 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
21923 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t a,uint16x4_t b)21924 uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) {
21925 return vsub_u16(a, b);
21926 }
21927
21928 // CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %a, <2 x i32> %b) #0 {
21929 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
21930 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t a,uint32x2_t b)21931 uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) {
21932 return vsub_u32(a, b);
21933 }
21934
21935 // CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %a, <1 x i64> %b) #0 {
21936 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
21937 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t a,uint64x1_t b)21938 uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) {
21939 return vsub_u64(a, b);
21940 }
21941
21942 // CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
21943 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
21944 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t a,int8x16_t b)21945 int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) {
21946 return vsubq_s8(a, b);
21947 }
21948
21949 // CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
21950 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
21951 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t a,int16x8_t b)21952 int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) {
21953 return vsubq_s16(a, b);
21954 }
21955
21956 // CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
21957 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
21958 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t a,int32x4_t b)21959 int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) {
21960 return vsubq_s32(a, b);
21961 }
21962
21963 // CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
21964 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
21965 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t a,int64x2_t b)21966 int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) {
21967 return vsubq_s64(a, b);
21968 }
21969
21970 // CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %a, <4 x float> %b) #0 {
21971 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, %b
21972 // CHECK: ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t a,float32x4_t b)21973 float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) {
21974 return vsubq_f32(a, b);
21975 }
21976
21977 // CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
21978 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
21979 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t a,uint8x16_t b)21980 uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) {
21981 return vsubq_u8(a, b);
21982 }
21983
21984 // CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
21985 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
21986 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t a,uint16x8_t b)21987 uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) {
21988 return vsubq_u16(a, b);
21989 }
21990
21991 // CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
21992 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
21993 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t a,uint32x4_t b)21994 uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) {
21995 return vsubq_u32(a, b);
21996 }
21997
21998 // CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
21999 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
22000 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t a,uint64x2_t b)22001 uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) {
22002 return vsubq_u64(a, b);
22003 }
22004
22005
22006 // CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
22007 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
22008 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
22009 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
22010 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
22011 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
22012 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
22013 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
22014 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)22015 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
22016 return vsubhn_s16(a, b);
22017 }
22018
22019 // CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
22020 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
22021 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
22022 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
22023 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
22024 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
22025 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
22026 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
22027 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)22028 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
22029 return vsubhn_s32(a, b);
22030 }
22031
22032 // CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
22033 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22034 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
22035 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22036 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
22037 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
22038 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
22039 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
22040 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)22041 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
22042 return vsubhn_s64(a, b);
22043 }
22044
22045 // CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
22046 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
22047 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
22048 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
22049 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
22050 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
22051 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
22052 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
22053 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)22054 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
22055 return vsubhn_u16(a, b);
22056 }
22057
22058 // CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
22059 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
22060 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
22061 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
22062 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
22063 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
22064 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
22065 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
22066 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)22067 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
22068 return vsubhn_u32(a, b);
22069 }
22070
22071 // CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
22072 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22073 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
22074 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22075 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
22076 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
22077 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
22078 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
22079 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)22080 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
22081 return vsubhn_u64(a, b);
22082 }
22083
22084
22085 // CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
22086 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
22087 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
22088 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
22089 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)22090 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
22091 return vsubl_s8(a, b);
22092 }
22093
22094 // CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
22095 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
22096 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
22097 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
22098 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
22099 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
22100 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
22101 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
22102 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)22103 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
22104 return vsubl_s16(a, b);
22105 }
22106
22107 // CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
22108 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22109 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22110 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
22111 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
22112 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
22113 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
22114 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
22115 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)22116 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
22117 return vsubl_s32(a, b);
22118 }
22119
22120 // CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
22121 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
22122 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
22123 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
22124 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)22125 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
22126 return vsubl_u8(a, b);
22127 }
22128
22129 // CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
22130 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
22131 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
22132 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
22133 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
22134 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
22135 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
22136 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
22137 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)22138 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
22139 return vsubl_u16(a, b);
22140 }
22141
22142 // CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
22143 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22144 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22145 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
22146 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
22147 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
22148 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
22149 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
22150 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)22151 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
22152 return vsubl_u32(a, b);
22153 }
22154
22155
22156 // CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
22157 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
22158 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
22159 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)22160 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
22161 return vsubw_s8(a, b);
22162 }
22163
22164 // CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
22165 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
22166 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
22167 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
22168 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
22169 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)22170 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
22171 return vsubw_s16(a, b);
22172 }
22173
22174 // CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
22175 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
22176 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22177 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
22178 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
22179 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)22180 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
22181 return vsubw_s32(a, b);
22182 }
22183
22184 // CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
22185 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
22186 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
22187 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)22188 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
22189 return vsubw_u8(a, b);
22190 }
22191
22192 // CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
22193 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
22194 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
22195 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
22196 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
22197 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)22198 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
22199 return vsubw_u16(a, b);
22200 }
22201
22202 // CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
22203 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
22204 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22205 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
22206 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
22207 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)22208 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
22209 return vsubw_u32(a, b);
22210 }
22211
22212
22213 // CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
22214 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4
22215 // CHECK: ret <8 x i8> [[VTBL1_I]]
test_vtbl1_u8(uint8x8_t a,uint8x8_t b)22216 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
22217 return vtbl1_u8(a, b);
22218 }
22219
22220 // CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
22221 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4
22222 // CHECK: ret <8 x i8> [[VTBL1_I]]
test_vtbl1_s8(int8x8_t a,int8x8_t b)22223 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
22224 return vtbl1_s8(a, b);
22225 }
22226
22227 // CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
22228 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4
22229 // CHECK: ret <8 x i8> [[VTBL1_I]]
test_vtbl1_p8(poly8x8_t a,uint8x8_t b)22230 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
22231 return vtbl1_p8(a, b);
22232 }
22233
22234
22235 // CHECK-LABEL: define <8 x i8> @test_vtbl2_u8([2 x i64] %a.coerce, <8 x i8> %b) #0 {
22236 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
22237 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8
22238 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
22239 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
22240 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
22241 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
22242 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
22243 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
22244 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
22245 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
22246 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
22247 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
22248 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22249 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22250 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
22251 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22252 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22253 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4
22254 // CHECK: ret <8 x i8> [[VTBL2_I]]
test_vtbl2_u8(uint8x8x2_t a,uint8x8_t b)22255 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
22256 return vtbl2_u8(a, b);
22257 }
22258
22259 // CHECK-LABEL: define <8 x i8> @test_vtbl2_s8([2 x i64] %a.coerce, <8 x i8> %b) #0 {
22260 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8
22261 // CHECK: [[A:%.*]] = alloca %struct.int8x8x2_t, align 8
22262 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
22263 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
22264 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
22265 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
22266 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
22267 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
22268 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
22269 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
22270 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
22271 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
22272 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22273 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22274 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
22275 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22276 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22277 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4
22278 // CHECK: ret <8 x i8> [[VTBL2_I]]
test_vtbl2_s8(int8x8x2_t a,int8x8_t b)22279 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
22280 return vtbl2_s8(a, b);
22281 }
22282
22283 // CHECK-LABEL: define <8 x i8> @test_vtbl2_p8([2 x i64] %a.coerce, <8 x i8> %b) #0 {
22284 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
22285 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8
22286 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
22287 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
22288 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
22289 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
22290 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
22291 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
22292 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
22293 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
22294 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
22295 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
22296 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22297 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22298 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
22299 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22300 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22301 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4
22302 // CHECK: ret <8 x i8> [[VTBL2_I]]
test_vtbl2_p8(poly8x8x2_t a,uint8x8_t b)22303 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
22304 return vtbl2_p8(a, b);
22305 }
22306
22307
22308 // CHECK-LABEL: define <8 x i8> @test_vtbl3_u8([3 x i64] %a.coerce, <8 x i8> %b) #0 {
22309 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
22310 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8
22311 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
22312 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
22313 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
22314 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
22315 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
22316 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
22317 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
22318 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
22319 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
22320 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
22321 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22322 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22323 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
22324 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22325 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22326 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
22327 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22328 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22329 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4
22330 // CHECK: ret <8 x i8> [[VTBL3_I]]
test_vtbl3_u8(uint8x8x3_t a,uint8x8_t b)22331 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
22332 return vtbl3_u8(a, b);
22333 }
22334
22335 // CHECK-LABEL: define <8 x i8> @test_vtbl3_s8([3 x i64] %a.coerce, <8 x i8> %b) #0 {
22336 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8
22337 // CHECK: [[A:%.*]] = alloca %struct.int8x8x3_t, align 8
22338 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
22339 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
22340 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
22341 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
22342 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
22343 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
22344 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
22345 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
22346 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
22347 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
22348 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22349 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22350 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
22351 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22352 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22353 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
22354 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22355 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22356 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4
22357 // CHECK: ret <8 x i8> [[VTBL3_I]]
test_vtbl3_s8(int8x8x3_t a,int8x8_t b)22358 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
22359 return vtbl3_s8(a, b);
22360 }
22361
22362 // CHECK-LABEL: define <8 x i8> @test_vtbl3_p8([3 x i64] %a.coerce, <8 x i8> %b) #0 {
22363 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
22364 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8
22365 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
22366 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
22367 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
22368 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
22369 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
22370 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
22371 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
22372 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
22373 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
22374 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
22375 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22376 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22377 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
22378 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22379 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22380 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
22381 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22382 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22383 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4
22384 // CHECK: ret <8 x i8> [[VTBL3_I]]
test_vtbl3_p8(poly8x8x3_t a,uint8x8_t b)22385 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
22386 return vtbl3_p8(a, b);
22387 }
22388
22389
22390 // CHECK-LABEL: define <8 x i8> @test_vtbl4_u8([4 x i64] %a.coerce, <8 x i8> %b) #0 {
22391 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
22392 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8
22393 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
22394 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
22395 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
22396 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
22397 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
22398 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
22399 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
22400 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
22401 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
22402 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
22403 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22404 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22405 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
22406 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22407 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22408 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
22409 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22410 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22411 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
22412 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
22413 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
22414 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4
22415 // CHECK: ret <8 x i8> [[VTBL4_I]]
test_vtbl4_u8(uint8x8x4_t a,uint8x8_t b)22416 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
22417 return vtbl4_u8(a, b);
22418 }
22419
22420 // CHECK-LABEL: define <8 x i8> @test_vtbl4_s8([4 x i64] %a.coerce, <8 x i8> %b) #0 {
22421 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8
22422 // CHECK: [[A:%.*]] = alloca %struct.int8x8x4_t, align 8
22423 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
22424 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
22425 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
22426 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
22427 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
22428 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
22429 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
22430 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
22431 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
22432 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
22433 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22434 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22435 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
22436 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22437 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22438 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
22439 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22440 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22441 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
22442 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
22443 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
22444 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4
22445 // CHECK: ret <8 x i8> [[VTBL4_I]]
test_vtbl4_s8(int8x8x4_t a,int8x8_t b)22446 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
22447 return vtbl4_s8(a, b);
22448 }
22449
22450 // CHECK-LABEL: define <8 x i8> @test_vtbl4_p8([4 x i64] %a.coerce, <8 x i8> %b) #0 {
22451 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
22452 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8
22453 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
22454 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
22455 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
22456 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
22457 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
22458 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
22459 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
22460 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
22461 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
22462 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
22463 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22464 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22465 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
22466 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22467 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22468 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
22469 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22470 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22471 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
22472 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
22473 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
22474 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4
22475 // CHECK: ret <8 x i8> [[VTBL4_I]]
test_vtbl4_p8(poly8x8x4_t a,uint8x8_t b)22476 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
22477 return vtbl4_p8(a, b);
22478 }
22479
22480
22481 // CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
22482 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4
22483 // CHECK: ret <8 x i8> [[VTBX1_I]]
test_vtbx1_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)22484 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
22485 return vtbx1_u8(a, b, c);
22486 }
22487
22488 // CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
22489 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4
22490 // CHECK: ret <8 x i8> [[VTBX1_I]]
test_vtbx1_s8(int8x8_t a,int8x8_t b,int8x8_t c)22491 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
22492 return vtbx1_s8(a, b, c);
22493 }
22494
22495 // CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
22496 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4
22497 // CHECK: ret <8 x i8> [[VTBX1_I]]
test_vtbx1_p8(poly8x8_t a,poly8x8_t b,uint8x8_t c)22498 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
22499 return vtbx1_p8(a, b, c);
22500 }
22501
22502
22503 // CHECK-LABEL: define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 {
22504 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
22505 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
22506 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
22507 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
22508 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
22509 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
22510 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
22511 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
22512 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
22513 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
22514 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
22515 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
22516 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22517 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22518 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
22519 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22520 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22521 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4
22522 // CHECK: ret <8 x i8> [[VTBX2_I]]
test_vtbx2_u8(uint8x8_t a,uint8x8x2_t b,uint8x8_t c)22523 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
22524 return vtbx2_u8(a, b, c);
22525 }
22526
22527 // CHECK-LABEL: define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 {
22528 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8
22529 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
22530 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
22531 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
22532 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
22533 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
22534 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
22535 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
22536 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
22537 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
22538 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
22539 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
22540 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22541 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22542 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
22543 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22544 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22545 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4
22546 // CHECK: ret <8 x i8> [[VTBX2_I]]
test_vtbx2_s8(int8x8_t a,int8x8x2_t b,int8x8_t c)22547 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
22548 return vtbx2_s8(a, b, c);
22549 }
22550
22551 // CHECK-LABEL: define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 {
22552 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
22553 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
22554 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
22555 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
22556 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
22557 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
22558 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
22559 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
22560 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
22561 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
22562 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
22563 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
22564 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22565 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22566 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
22567 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22568 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22569 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4
22570 // CHECK: ret <8 x i8> [[VTBX2_I]]
test_vtbx2_p8(poly8x8_t a,poly8x8x2_t b,uint8x8_t c)22571 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
22572 return vtbx2_p8(a, b, c);
22573 }
22574
22575
22576 // CHECK-LABEL: define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 {
22577 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
22578 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
22579 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
22580 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
22581 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
22582 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
22583 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
22584 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
22585 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
22586 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
22587 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
22588 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
22589 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22590 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22591 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
22592 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22593 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22594 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
22595 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22596 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22597 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4
22598 // CHECK: ret <8 x i8> [[VTBX3_I]]
test_vtbx3_u8(uint8x8_t a,uint8x8x3_t b,uint8x8_t c)22599 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
22600 return vtbx3_u8(a, b, c);
22601 }
22602
22603 // CHECK-LABEL: define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 {
22604 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8
22605 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
22606 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
22607 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
22608 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
22609 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
22610 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
22611 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
22612 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
22613 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
22614 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
22615 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
22616 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22617 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22618 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
22619 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22620 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22621 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
22622 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22623 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22624 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4
22625 // CHECK: ret <8 x i8> [[VTBX3_I]]
test_vtbx3_s8(int8x8_t a,int8x8x3_t b,int8x8_t c)22626 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
22627 return vtbx3_s8(a, b, c);
22628 }
22629
22630 // CHECK-LABEL: define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 {
22631 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
22632 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
22633 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
22634 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
22635 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
22636 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
22637 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
22638 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
22639 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
22640 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
22641 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
22642 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
22643 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22644 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22645 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
22646 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22647 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22648 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
22649 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22650 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22651 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4
22652 // CHECK: ret <8 x i8> [[VTBX3_I]]
test_vtbx3_p8(poly8x8_t a,poly8x8x3_t b,uint8x8_t c)22653 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
22654 return vtbx3_p8(a, b, c);
22655 }
22656
22657
22658 // CHECK-LABEL: define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 {
22659 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
22660 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
22661 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
22662 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
22663 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
22664 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
22665 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
22666 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
22667 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
22668 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
22669 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
22670 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
22671 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22672 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22673 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
22674 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22675 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22676 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
22677 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22678 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22679 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
22680 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
22681 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
22682 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4
22683 // CHECK: ret <8 x i8> [[VTBX4_I]]
test_vtbx4_u8(uint8x8_t a,uint8x8x4_t b,uint8x8_t c)22684 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
22685 return vtbx4_u8(a, b, c);
22686 }
22687
22688 // CHECK-LABEL: define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 {
22689 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8
22690 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
22691 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
22692 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
22693 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
22694 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
22695 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
22696 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
22697 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
22698 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
22699 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
22700 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
22701 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22702 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22703 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
22704 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22705 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22706 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
22707 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22708 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22709 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
22710 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
22711 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
22712 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4
22713 // CHECK: ret <8 x i8> [[VTBX4_I]]
test_vtbx4_s8(int8x8_t a,int8x8x4_t b,int8x8_t c)22714 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
22715 return vtbx4_s8(a, b, c);
22716 }
22717
22718 // CHECK-LABEL: define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 {
22719 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
22720 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
22721 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
22722 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
22723 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
22724 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
22725 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
22726 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
22727 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
22728 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
22729 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
22730 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
22731 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
22732 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
22733 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
22734 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
22735 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
22736 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
22737 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
22738 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
22739 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
22740 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
22741 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
22742 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4
22743 // CHECK: ret <8 x i8> [[VTBX4_I]]
test_vtbx4_p8(poly8x8_t a,poly8x8x4_t b,uint8x8_t c)22744 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
22745 return vtbx4_p8(a, b, c);
22746 }
22747
22748
22749 // CHECK-LABEL: define void @test_vtrn_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 {
22750 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
22751 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
22752 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
22753 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
22754 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
22755 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
22756 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
22757 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
22758 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8*
22759 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
22760 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4
22761 // CHECK: ret void
test_vtrn_s8(int8x8_t a,int8x8_t b)22762 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
22763 return vtrn_s8(a, b);
22764 }
22765
22766 // CHECK-LABEL: define void @test_vtrn_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 {
22767 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
22768 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
22769 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
22770 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
22771 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
22772 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
22773 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
22774 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
22775 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
22776 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
22777 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
22778 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
22779 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8*
22780 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
22781 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
22782 // CHECK: ret void
test_vtrn_s16(int16x4_t a,int16x4_t b)22783 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
22784 return vtrn_s16(a, b);
22785 }
22786
22787 // CHECK-LABEL: define void @test_vtrn_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 {
22788 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
22789 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
22790 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22791 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
22792 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
22793 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
22794 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
22795 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
22796 // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
22797 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
22798 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
22799 // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
22800 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8*
22801 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
22802 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
22803 // CHECK: ret void
test_vtrn_s32(int32x2_t a,int32x2_t b)22804 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
22805 return vtrn_s32(a, b);
22806 }
22807
22808 // CHECK-LABEL: define void @test_vtrn_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 {
22809 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
22810 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
22811 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
22812 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
22813 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
22814 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
22815 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
22816 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
22817 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8*
22818 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
22819 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4
22820 // CHECK: ret void
test_vtrn_u8(uint8x8_t a,uint8x8_t b)22821 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
22822 return vtrn_u8(a, b);
22823 }
22824
22825 // CHECK-LABEL: define void @test_vtrn_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 {
22826 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
22827 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
22828 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
22829 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
22830 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
22831 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
22832 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
22833 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
22834 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
22835 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
22836 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
22837 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
22838 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8*
22839 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
22840 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
22841 // CHECK: ret void
test_vtrn_u16(uint16x4_t a,uint16x4_t b)22842 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
22843 return vtrn_u16(a, b);
22844 }
22845
22846 // CHECK-LABEL: define void @test_vtrn_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 {
22847 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
22848 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
22849 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22850 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
22851 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
22852 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
22853 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
22854 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
22855 // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
22856 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
22857 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
22858 // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
22859 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8*
22860 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
22861 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
22862 // CHECK: ret void
test_vtrn_u32(uint32x2_t a,uint32x2_t b)22863 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
22864 return vtrn_u32(a, b);
22865 }
22866
22867 // CHECK-LABEL: define void @test_vtrn_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 {
22868 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
22869 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
22870 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
22871 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
22872 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
22873 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
22874 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
22875 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
22876 // CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]]
22877 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
22878 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
22879 // CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP6]]
22880 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8*
22881 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
22882 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
22883 // CHECK: ret void
test_vtrn_f32(float32x2_t a,float32x2_t b)22884 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
22885 return vtrn_f32(a, b);
22886 }
22887
22888 // CHECK-LABEL: define void @test_vtrn_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 {
22889 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
22890 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
22891 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
22892 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
22893 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
22894 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
22895 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
22896 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
22897 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8*
22898 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
22899 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4
22900 // CHECK: ret void
test_vtrn_p8(poly8x8_t a,poly8x8_t b)22901 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
22902 return vtrn_p8(a, b);
22903 }
22904
22905 // CHECK-LABEL: define void @test_vtrn_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 {
22906 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
22907 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
22908 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
22909 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
22910 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
22911 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
22912 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
22913 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
22914 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
22915 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
22916 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
22917 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
22918 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8*
22919 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
22920 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
22921 // CHECK: ret void
test_vtrn_p16(poly16x4_t a,poly16x4_t b)22922 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
22923 return vtrn_p16(a, b);
22924 }
22925
22926 // CHECK-LABEL: define void @test_vtrnq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 {
22927 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
22928 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
22929 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
22930 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
22931 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
22932 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
22933 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
22934 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
22935 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
22936 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
22937 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4
22938 // CHECK: ret void
test_vtrnq_s8(int8x16_t a,int8x16_t b)22939 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
22940 return vtrnq_s8(a, b);
22941 }
22942
22943 // CHECK-LABEL: define void @test_vtrnq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 {
22944 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
22945 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
22946 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
22947 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
22948 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
22949 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
22950 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
22951 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
22952 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
22953 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
22954 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
22955 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
22956 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
22957 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
22958 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
22959 // CHECK: ret void
test_vtrnq_s16(int16x8_t a,int16x8_t b)22960 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
22961 return vtrnq_s16(a, b);
22962 }
22963
22964 // CHECK-LABEL: define void @test_vtrnq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 {
22965 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
22966 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
22967 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
22968 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
22969 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
22970 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
22971 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
22972 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
22973 // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
22974 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
22975 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
22976 // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
22977 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
22978 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
22979 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
22980 // CHECK: ret void
test_vtrnq_s32(int32x4_t a,int32x4_t b)22981 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
22982 return vtrnq_s32(a, b);
22983 }
22984
22985 // CHECK-LABEL: define void @test_vtrnq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 {
22986 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
22987 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
22988 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
22989 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
22990 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
22991 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
22992 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
22993 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
22994 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
22995 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
22996 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4
22997 // CHECK: ret void
test_vtrnq_u8(uint8x16_t a,uint8x16_t b)22998 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
22999 return vtrnq_u8(a, b);
23000 }
23001
23002 // CHECK-LABEL: define void @test_vtrnq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 {
23003 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
23004 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
23005 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23006 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23007 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
23008 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23009 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
23010 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
23011 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
23012 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
23013 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
23014 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
23015 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
23016 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
23017 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23018 // CHECK: ret void
test_vtrnq_u16(uint16x8_t a,uint16x8_t b)23019 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
23020 return vtrnq_u16(a, b);
23021 }
23022
23023 // CHECK-LABEL: define void @test_vtrnq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 {
23024 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
23025 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
23026 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
23027 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
23028 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
23029 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
23030 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
23031 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
23032 // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
23033 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
23034 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
23035 // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
23036 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
23037 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
23038 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23039 // CHECK: ret void
test_vtrnq_u32(uint32x4_t a,uint32x4_t b)23040 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
23041 return vtrnq_u32(a, b);
23042 }
23043
23044 // CHECK-LABEL: define void @test_vtrnq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 {
23045 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
23046 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
23047 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
23048 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
23049 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
23050 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
23051 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
23052 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
23053 // CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]]
23054 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
23055 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
23056 // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP6]]
23057 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
23058 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
23059 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23060 // CHECK: ret void
test_vtrnq_f32(float32x4_t a,float32x4_t b)23061 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
23062 return vtrnq_f32(a, b);
23063 }
23064
23065 // CHECK-LABEL: define void @test_vtrnq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 {
23066 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
23067 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
23068 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
23069 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
23070 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
23071 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
23072 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
23073 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
23074 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
23075 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
23076 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4
23077 // CHECK: ret void
test_vtrnq_p8(poly8x16_t a,poly8x16_t b)23078 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
23079 return vtrnq_p8(a, b);
23080 }
23081
23082 // CHECK-LABEL: define void @test_vtrnq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 {
23083 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
23084 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
23085 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23086 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23087 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
23088 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23089 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
23090 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
23091 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
23092 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
23093 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
23094 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
23095 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
23096 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
23097 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23098 // CHECK: ret void
test_vtrnq_p16(poly16x8_t a,poly16x8_t b)23099 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
23100 return vtrnq_p16(a, b);
23101 }
23102
23103
23104 // CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %a, <8 x i8> %b) #0 {
23105 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
23106 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
23107 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
23108 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t a,int8x8_t b)23109 uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) {
23110 return vtst_s8(a, b);
23111 }
23112
23113 // CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %a, <4 x i16> %b) #0 {
23114 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
23115 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
23116 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
23117 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
23118 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
23119 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
23120 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
23121 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t a,int16x4_t b)23122 uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) {
23123 return vtst_s16(a, b);
23124 }
23125
23126 // CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %a, <2 x i32> %b) #0 {
23127 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
23128 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
23129 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
23130 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
23131 // CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
23132 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
23133 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
23134 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t a,int32x2_t b)23135 uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) {
23136 return vtst_s32(a, b);
23137 }
23138
23139 // CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %a, <8 x i8> %b) #0 {
23140 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
23141 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
23142 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
23143 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t a,uint8x8_t b)23144 uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) {
23145 return vtst_u8(a, b);
23146 }
23147
23148 // CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %a, <4 x i16> %b) #0 {
23149 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
23150 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
23151 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
23152 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
23153 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
23154 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
23155 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
23156 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t a,uint16x4_t b)23157 uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) {
23158 return vtst_u16(a, b);
23159 }
23160
23161 // CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %a, <2 x i32> %b) #0 {
23162 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
23163 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
23164 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
23165 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
23166 // CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
23167 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
23168 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
23169 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t a,uint32x2_t b)23170 uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) {
23171 return vtst_u32(a, b);
23172 }
23173
23174 // CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %a, <8 x i8> %b) #0 {
23175 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
23176 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
23177 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
23178 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t a,poly8x8_t b)23179 uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) {
23180 return vtst_p8(a, b);
23181 }
23182
23183 // CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %a, <4 x i16> %b) #0 {
23184 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
23185 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
23186 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
23187 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
23188 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
23189 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
23190 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
23191 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t a,poly16x4_t b)23192 uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) {
23193 return vtst_p16(a, b);
23194 }
23195
23196 // CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
23197 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
23198 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
23199 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
23200 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t a,int8x16_t b)23201 uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) {
23202 return vtstq_s8(a, b);
23203 }
23204
23205 // CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
23206 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23207 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23208 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
23209 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23210 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
23211 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
23212 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
23213 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t a,int16x8_t b)23214 uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) {
23215 return vtstq_s16(a, b);
23216 }
23217
23218 // CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
23219 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
23220 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
23221 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
23222 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
23223 // CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
23224 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
23225 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
23226 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t a,int32x4_t b)23227 uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) {
23228 return vtstq_s32(a, b);
23229 }
23230
23231 // CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
23232 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
23233 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
23234 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
23235 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t a,uint8x16_t b)23236 uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) {
23237 return vtstq_u8(a, b);
23238 }
23239
23240 // CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
23241 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23242 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23243 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
23244 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23245 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
23246 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
23247 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
23248 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t a,uint16x8_t b)23249 uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) {
23250 return vtstq_u16(a, b);
23251 }
23252
23253 // CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
23254 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
23255 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
23256 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
23257 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
23258 // CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
23259 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
23260 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
23261 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t a,uint32x4_t b)23262 uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) {
23263 return vtstq_u32(a, b);
23264 }
23265
23266 // CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
23267 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
23268 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
23269 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
23270 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t a,poly8x16_t b)23271 uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) {
23272 return vtstq_p8(a, b);
23273 }
23274
23275 // CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
23276 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23277 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23278 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
23279 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23280 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
23281 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
23282 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
23283 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t a,poly16x8_t b)23284 uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) {
23285 return vtstq_p16(a, b);
23286 }
23287
23288
23289 // CHECK-LABEL: define void @test_vuzp_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 {
23290 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
23291 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
23292 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
23293 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
23294 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
23295 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
23296 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
23297 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
23298 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8*
23299 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
23300 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4
23301 // CHECK: ret void
test_vuzp_s8(int8x8_t a,int8x8_t b)23302 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
23303 return vuzp_s8(a, b);
23304 }
23305
23306 // CHECK-LABEL: define void @test_vuzp_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 {
23307 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
23308 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
23309 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
23310 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
23311 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
23312 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
23313 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
23314 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
23315 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
23316 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
23317 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23318 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
23319 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8*
23320 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
23321 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23322 // CHECK: ret void
test_vuzp_s16(int16x4_t a,int16x4_t b)23323 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
23324 return vuzp_s16(a, b);
23325 }
23326
23327 // CHECK-LABEL: define void @test_vuzp_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 {
23328 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
23329 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
23330 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
23331 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
23332 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
23333 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
23334 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
23335 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
23336 // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
23337 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
23338 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
23339 // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
23340 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8*
23341 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
23342 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23343 // CHECK: ret void
test_vuzp_s32(int32x2_t a,int32x2_t b)23344 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
23345 return vuzp_s32(a, b);
23346 }
23347
23348 // CHECK-LABEL: define void @test_vuzp_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 {
23349 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
23350 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
23351 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
23352 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
23353 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
23354 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
23355 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
23356 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
23357 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8*
23358 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
23359 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4
23360 // CHECK: ret void
test_vuzp_u8(uint8x8_t a,uint8x8_t b)23361 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
23362 return vuzp_u8(a, b);
23363 }
23364
23365 // CHECK-LABEL: define void @test_vuzp_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 {
23366 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
23367 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
23368 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
23369 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
23370 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
23371 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
23372 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
23373 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
23374 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
23375 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
23376 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23377 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
23378 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8*
23379 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
23380 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23381 // CHECK: ret void
test_vuzp_u16(uint16x4_t a,uint16x4_t b)23382 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
23383 return vuzp_u16(a, b);
23384 }
23385
23386 // CHECK-LABEL: define void @test_vuzp_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 {
23387 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
23388 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
23389 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
23390 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
23391 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
23392 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
23393 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
23394 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
23395 // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
23396 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
23397 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
23398 // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
23399 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8*
23400 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
23401 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23402 // CHECK: ret void
test_vuzp_u32(uint32x2_t a,uint32x2_t b)23403 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
23404 return vuzp_u32(a, b);
23405 }
23406
23407 // CHECK-LABEL: define void @test_vuzp_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 {
23408 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
23409 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
23410 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
23411 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
23412 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
23413 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
23414 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
23415 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
23416 // CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]]
23417 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
23418 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
23419 // CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP6]]
23420 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8*
23421 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
23422 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23423 // CHECK: ret void
test_vuzp_f32(float32x2_t a,float32x2_t b)23424 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
23425 return vuzp_f32(a, b);
23426 }
23427
23428 // CHECK-LABEL: define void @test_vuzp_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 {
23429 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
23430 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
23431 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
23432 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
23433 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
23434 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
23435 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
23436 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
23437 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8*
23438 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
23439 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4
23440 // CHECK: ret void
test_vuzp_p8(poly8x8_t a,poly8x8_t b)23441 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
23442 return vuzp_p8(a, b);
23443 }
23444
23445 // CHECK-LABEL: define void @test_vuzp_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 {
23446 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
23447 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
23448 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
23449 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
23450 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
23451 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
23452 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
23453 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
23454 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
23455 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
23456 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23457 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
23458 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8*
23459 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
23460 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23461 // CHECK: ret void
test_vuzp_p16(poly16x4_t a,poly16x4_t b)23462 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
23463 return vuzp_p16(a, b);
23464 }
23465
23466 // CHECK-LABEL: define void @test_vuzpq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 {
23467 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
23468 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
23469 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
23470 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
23471 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
23472 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
23473 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
23474 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
23475 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
23476 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
23477 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4
23478 // CHECK: ret void
test_vuzpq_s8(int8x16_t a,int8x16_t b)23479 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
23480 return vuzpq_s8(a, b);
23481 }
23482
23483 // CHECK-LABEL: define void @test_vuzpq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 {
23484 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
23485 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
23486 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23487 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23488 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
23489 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23490 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
23491 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
23492 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
23493 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
23494 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
23495 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
23496 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
23497 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
23498 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23499 // CHECK: ret void
test_vuzpq_s16(int16x8_t a,int16x8_t b)23500 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
23501 return vuzpq_s16(a, b);
23502 }
23503
23504 // CHECK-LABEL: define void @test_vuzpq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 {
23505 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
23506 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
23507 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
23508 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
23509 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
23510 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
23511 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
23512 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
23513 // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
23514 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
23515 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23516 // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
23517 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
23518 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
23519 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23520 // CHECK: ret void
test_vuzpq_s32(int32x4_t a,int32x4_t b)23521 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
23522 return vuzpq_s32(a, b);
23523 }
23524
23525 // CHECK-LABEL: define void @test_vuzpq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 {
23526 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
23527 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
23528 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
23529 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
23530 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
23531 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
23532 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
23533 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
23534 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
23535 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
23536 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4
23537 // CHECK: ret void
test_vuzpq_u8(uint8x16_t a,uint8x16_t b)23538 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
23539 return vuzpq_u8(a, b);
23540 }
23541
23542 // CHECK-LABEL: define void @test_vuzpq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 {
23543 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
23544 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
23545 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23546 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23547 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
23548 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23549 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
23550 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
23551 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
23552 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
23553 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
23554 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
23555 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
23556 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
23557 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23558 // CHECK: ret void
test_vuzpq_u16(uint16x8_t a,uint16x8_t b)23559 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
23560 return vuzpq_u16(a, b);
23561 }
23562
23563 // CHECK-LABEL: define void @test_vuzpq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 {
23564 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
23565 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
23566 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
23567 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
23568 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
23569 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
23570 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
23571 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
23572 // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
23573 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
23574 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23575 // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
23576 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
23577 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
23578 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23579 // CHECK: ret void
test_vuzpq_u32(uint32x4_t a,uint32x4_t b)23580 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
23581 return vuzpq_u32(a, b);
23582 }
23583
23584 // CHECK-LABEL: define void @test_vuzpq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 {
23585 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
23586 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
23587 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
23588 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
23589 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
23590 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
23591 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
23592 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
23593 // CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]]
23594 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
23595 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23596 // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP6]]
23597 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
23598 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
23599 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23600 // CHECK: ret void
test_vuzpq_f32(float32x4_t a,float32x4_t b)23601 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
23602 return vuzpq_f32(a, b);
23603 }
23604
23605 // CHECK-LABEL: define void @test_vuzpq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 {
23606 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
23607 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
23608 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
23609 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
23610 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
23611 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
23612 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
23613 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
23614 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
23615 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
23616 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4
23617 // CHECK: ret void
test_vuzpq_p8(poly8x16_t a,poly8x16_t b)23618 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
23619 return vuzpq_p8(a, b);
23620 }
23621
23622 // CHECK-LABEL: define void @test_vuzpq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 {
23623 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
23624 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
23625 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23626 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23627 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
23628 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23629 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
23630 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
23631 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
23632 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
23633 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
23634 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
23635 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
23636 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
23637 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23638 // CHECK: ret void
test_vuzpq_p16(poly16x8_t a,poly16x8_t b)23639 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
23640 return vuzpq_p16(a, b);
23641 }
23642
23643
23644 // CHECK-LABEL: define void @test_vzip_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 {
23645 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
23646 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
23647 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
23648 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
23649 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
23650 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
23651 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
23652 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
23653 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8*
23654 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
23655 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4
23656 // CHECK: ret void
test_vzip_s8(int8x8_t a,int8x8_t b)23657 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
23658 return vzip_s8(a, b);
23659 }
23660
23661 // CHECK-LABEL: define void @test_vzip_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 {
23662 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
23663 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
23664 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
23665 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
23666 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
23667 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
23668 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
23669 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
23670 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
23671 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
23672 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
23673 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
23674 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8*
23675 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
23676 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23677 // CHECK: ret void
test_vzip_s16(int16x4_t a,int16x4_t b)23678 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
23679 return vzip_s16(a, b);
23680 }
23681
23682 // CHECK-LABEL: define void @test_vzip_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 {
23683 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
23684 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
23685 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
23686 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
23687 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
23688 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
23689 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
23690 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
23691 // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
23692 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
23693 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
23694 // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
23695 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8*
23696 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
23697 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23698 // CHECK: ret void
test_vzip_s32(int32x2_t a,int32x2_t b)23699 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
23700 return vzip_s32(a, b);
23701 }
23702
23703 // CHECK-LABEL: define void @test_vzip_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 {
23704 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
23705 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
23706 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
23707 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
23708 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
23709 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
23710 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
23711 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
23712 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8*
23713 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
23714 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4
23715 // CHECK: ret void
test_vzip_u8(uint8x8_t a,uint8x8_t b)23716 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
23717 return vzip_u8(a, b);
23718 }
23719
23720 // CHECK-LABEL: define void @test_vzip_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 {
23721 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
23722 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
23723 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
23724 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
23725 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
23726 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
23727 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
23728 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
23729 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
23730 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
23731 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
23732 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
23733 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8*
23734 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
23735 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23736 // CHECK: ret void
test_vzip_u16(uint16x4_t a,uint16x4_t b)23737 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
23738 return vzip_u16(a, b);
23739 }
23740
23741 // CHECK-LABEL: define void @test_vzip_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 {
23742 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
23743 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
23744 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
23745 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
23746 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
23747 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
23748 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
23749 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
23750 // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
23751 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
23752 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
23753 // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
23754 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8*
23755 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
23756 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23757 // CHECK: ret void
test_vzip_u32(uint32x2_t a,uint32x2_t b)23758 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
23759 return vzip_u32(a, b);
23760 }
23761
23762 // CHECK-LABEL: define void @test_vzip_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 {
23763 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
23764 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
23765 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
23766 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
23767 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
23768 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
23769 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
23770 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
23771 // CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]]
23772 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
23773 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
23774 // CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP6]]
23775 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8*
23776 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
23777 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23778 // CHECK: ret void
test_vzip_f32(float32x2_t a,float32x2_t b)23779 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
23780 return vzip_f32(a, b);
23781 }
23782
23783 // CHECK-LABEL: define void @test_vzip_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 {
23784 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
23785 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
23786 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
23787 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
23788 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
23789 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
23790 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
23791 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
23792 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8*
23793 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
23794 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4
23795 // CHECK: ret void
test_vzip_p8(poly8x8_t a,poly8x8_t b)23796 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
23797 return vzip_p8(a, b);
23798 }
23799
23800 // CHECK-LABEL: define void @test_vzip_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 {
23801 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
23802 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
23803 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
23804 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
23805 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
23806 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
23807 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
23808 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
23809 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
23810 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
23811 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
23812 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
23813 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8*
23814 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
23815 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4
23816 // CHECK: ret void
test_vzip_p16(poly16x4_t a,poly16x4_t b)23817 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
23818 return vzip_p16(a, b);
23819 }
23820
23821 // CHECK-LABEL: define void @test_vzipq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 {
23822 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
23823 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
23824 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
23825 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
23826 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
23827 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
23828 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
23829 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
23830 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
23831 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
23832 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4
23833 // CHECK: ret void
test_vzipq_s8(int8x16_t a,int8x16_t b)23834 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
23835 return vzipq_s8(a, b);
23836 }
23837
23838 // CHECK-LABEL: define void @test_vzipq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 {
23839 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
23840 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
23841 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23842 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23843 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
23844 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23845 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
23846 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
23847 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
23848 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
23849 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
23850 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
23851 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
23852 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
23853 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23854 // CHECK: ret void
test_vzipq_s16(int16x8_t a,int16x8_t b)23855 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
23856 return vzipq_s16(a, b);
23857 }
23858
23859 // CHECK-LABEL: define void @test_vzipq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 {
23860 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
23861 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
23862 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
23863 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
23864 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
23865 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
23866 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
23867 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
23868 // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
23869 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
23870 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
23871 // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
23872 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
23873 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
23874 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23875 // CHECK: ret void
test_vzipq_s32(int32x4_t a,int32x4_t b)23876 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
23877 return vzipq_s32(a, b);
23878 }
23879
23880 // CHECK-LABEL: define void @test_vzipq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 {
23881 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
23882 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
23883 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
23884 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
23885 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
23886 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
23887 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
23888 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
23889 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
23890 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
23891 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4
23892 // CHECK: ret void
test_vzipq_u8(uint8x16_t a,uint8x16_t b)23893 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
23894 return vzipq_u8(a, b);
23895 }
23896
23897 // CHECK-LABEL: define void @test_vzipq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 {
23898 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
23899 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
23900 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23901 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23902 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
23903 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23904 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
23905 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
23906 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
23907 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
23908 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
23909 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
23910 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
23911 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
23912 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23913 // CHECK: ret void
test_vzipq_u16(uint16x8_t a,uint16x8_t b)23914 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
23915 return vzipq_u16(a, b);
23916 }
23917
23918 // CHECK-LABEL: define void @test_vzipq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 {
23919 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
23920 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
23921 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
23922 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
23923 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
23924 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
23925 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
23926 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
23927 // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
23928 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
23929 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
23930 // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
23931 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
23932 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
23933 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23934 // CHECK: ret void
test_vzipq_u32(uint32x4_t a,uint32x4_t b)23935 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
23936 return vzipq_u32(a, b);
23937 }
23938
23939 // CHECK-LABEL: define void @test_vzipq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 {
23940 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
23941 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
23942 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
23943 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
23944 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
23945 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
23946 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
23947 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
23948 // CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]]
23949 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
23950 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
23951 // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP6]]
23952 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
23953 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
23954 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23955 // CHECK: ret void
test_vzipq_f32(float32x4_t a,float32x4_t b)23956 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
23957 return vzipq_f32(a, b);
23958 }
23959
23960 // CHECK-LABEL: define void @test_vzipq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 {
23961 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
23962 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
23963 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
23964 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
23965 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
23966 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
23967 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
23968 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
23969 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
23970 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
23971 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4
23972 // CHECK: ret void
test_vzipq_p8(poly8x16_t a,poly8x16_t b)23973 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
23974 return vzipq_p8(a, b);
23975 }
23976
23977 // CHECK-LABEL: define void @test_vzipq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 {
23978 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
23979 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
23980 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
23981 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
23982 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
23983 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
23984 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
23985 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
23986 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
23987 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
23988 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
23989 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
23990 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
23991 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
23992 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4
23993 // CHECK: ret void
test_vzipq_p16(poly16x8_t a,poly16x8_t b)23994 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
23995 return vzipq_p16(a, b);
23996 }
23997
23998
23999